diff options
author | Stanislav Kirillov <staskirillov@gmail.com> | 2022-02-10 16:46:07 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:07 +0300 |
commit | 92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c (patch) | |
tree | 817034f4ca57c9f841bb047ec94630c2e78a2b1d /contrib/libs/tbb/include | |
parent | 53c76da6d9f6cc5a17f6029df396f0e3bc1ff47d (diff) | |
download | ydb-92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c.tar.gz |
Restoring authorship annotation for Stanislav Kirillov <staskirillov@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/tbb/include')
129 files changed, 31998 insertions, 31998 deletions
diff --git a/contrib/libs/tbb/include/oneapi/tbb.h b/contrib/libs/tbb/include/oneapi/tbb.h index 1ca41dc516..b51eaf90f6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb.h +++ b/contrib/libs/tbb/include/oneapi/tbb.h @@ -1,73 +1,73 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_tbb_H -#define __TBB_tbb_H - -/** - This header bulk-includes declarations or definitions of all the functionality - provided by TBB (save for tbbmalloc and 3rd party dependent headers). - - If you use only a few TBB constructs, consider including specific headers only. - Any header listed below can be included independently of others. -**/ - -#include "oneapi/tbb/blocked_range.h" -#include "oneapi/tbb/blocked_range2d.h" -#include "oneapi/tbb/blocked_range3d.h" -#if TBB_PREVIEW_BLOCKED_RANGE_ND -#include "tbb/blocked_rangeNd.h" -#endif -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "oneapi/tbb/combinable.h" -#include "oneapi/tbb/concurrent_hash_map.h" -#if TBB_PREVIEW_CONCURRENT_LRU_CACHE -#include "tbb/concurrent_lru_cache.h" -#endif -#include "oneapi/tbb/concurrent_priority_queue.h" -#include "oneapi/tbb/concurrent_queue.h" -#include "oneapi/tbb/concurrent_unordered_map.h" -#include "oneapi/tbb/concurrent_unordered_set.h" -#include "oneapi/tbb/concurrent_map.h" -#include "oneapi/tbb/concurrent_set.h" -#include "oneapi/tbb/concurrent_vector.h" -#include "oneapi/tbb/enumerable_thread_specific.h" -#include "oneapi/tbb/flow_graph.h" -#include "oneapi/tbb/global_control.h" -#include "oneapi/tbb/info.h" -#include "oneapi/tbb/null_mutex.h" -#include "oneapi/tbb/null_rw_mutex.h" -#include "oneapi/tbb/parallel_for.h" -#include "oneapi/tbb/parallel_for_each.h" -#include "oneapi/tbb/parallel_invoke.h" -#include "oneapi/tbb/parallel_pipeline.h" -#include "oneapi/tbb/parallel_reduce.h" -#include "oneapi/tbb/parallel_scan.h" -#include "oneapi/tbb/parallel_sort.h" -#include "oneapi/tbb/partitioner.h" -#include "oneapi/tbb/queuing_mutex.h" -#include "oneapi/tbb/queuing_rw_mutex.h" -#include "oneapi/tbb/spin_mutex.h" -#include "oneapi/tbb/spin_rw_mutex.h" -#include "oneapi/tbb/task.h" -#include "oneapi/tbb/task_arena.h" -#include "oneapi/tbb/task_group.h" -#include "oneapi/tbb/task_scheduler_observer.h" -#include "oneapi/tbb/tbb_allocator.h" -#include "oneapi/tbb/tick_count.h" -#include "oneapi/tbb/version.h" - -#endif /* __TBB_tbb_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_H +#define __TBB_tbb_H + +/** + This header bulk-includes declarations or definitions of all the functionality + provided by TBB (save for tbbmalloc and 3rd party dependent headers). + + If you use only a few TBB constructs, consider including specific headers only. + Any header listed below can be included independently of others. +**/ + +#include "oneapi/tbb/blocked_range.h" +#include "oneapi/tbb/blocked_range2d.h" +#include "oneapi/tbb/blocked_range3d.h" +#if TBB_PREVIEW_BLOCKED_RANGE_ND +#include "tbb/blocked_rangeNd.h" +#endif +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/combinable.h" +#include "oneapi/tbb/concurrent_hash_map.h" +#if TBB_PREVIEW_CONCURRENT_LRU_CACHE +#include "tbb/concurrent_lru_cache.h" +#endif +#include "oneapi/tbb/concurrent_priority_queue.h" +#include "oneapi/tbb/concurrent_queue.h" +#include "oneapi/tbb/concurrent_unordered_map.h" +#include "oneapi/tbb/concurrent_unordered_set.h" +#include "oneapi/tbb/concurrent_map.h" +#include "oneapi/tbb/concurrent_set.h" +#include "oneapi/tbb/concurrent_vector.h" +#include "oneapi/tbb/enumerable_thread_specific.h" +#include "oneapi/tbb/flow_graph.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/null_mutex.h" +#include "oneapi/tbb/null_rw_mutex.h" +#include "oneapi/tbb/parallel_for.h" +#include "oneapi/tbb/parallel_for_each.h" +#include "oneapi/tbb/parallel_invoke.h" +#include "oneapi/tbb/parallel_pipeline.h" +#include "oneapi/tbb/parallel_reduce.h" +#include "oneapi/tbb/parallel_scan.h" +#include "oneapi/tbb/parallel_sort.h" +#include "oneapi/tbb/partitioner.h" +#include "oneapi/tbb/queuing_mutex.h" +#include "oneapi/tbb/queuing_rw_mutex.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task.h" +#include "oneapi/tbb/task_arena.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/task_scheduler_observer.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/tick_count.h" +#include "oneapi/tbb/version.h" + +#endif /* __TBB_tbb_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h index f6612fb4e3..f69e8bb3fe 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h @@ -1,163 +1,163 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_blocked_range_H -#define __TBB_blocked_range_H - -#include <cstddef> - -#include "detail/_range_common.h" -#include "detail/_namespace_injection.h" - -#include "version.h" - -namespace tbb { -namespace detail { -namespace d1 { - -/** \page range_req Requirements on range concept - Class \c R implementing the concept of range must define: - - \code R::R( const R& ); \endcode Copy constructor - - \code R::~R(); \endcode Destructor - - \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges - - \code bool R::empty() const; \endcode True if range is empty - - \code R::R( R& r, split ); \endcode Split range \c r into two subranges. -**/ - -//! A range over which to iterate. -/** @ingroup algorithms */ -template<typename Value> -class blocked_range { -public: - //! Type of a value - /** Called a const_iterator for sake of algorithms that need to treat a blocked_range - as an STL container. */ - using const_iterator = Value; - - //! Type for size of a range - using size_type = std::size_t; - - //! Construct range over half-open interval [begin,end), with the given grainsize. - blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : - my_end(end_), my_begin(begin_), my_grainsize(grainsize_) - { - __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); - } - - //! Beginning of range. - const_iterator begin() const { return my_begin; } - - //! One past last value in range. - const_iterator end() const { return my_end; } - - //! Size of the range - /** Unspecified if end()<begin(). */ - size_type size() const { - __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" ); - return size_type(my_end-my_begin); - } - - //! The grain size for this range. - size_type grainsize() const { return my_grainsize; } - - //------------------------------------------------------------------------ - // Methods that implement Range concept - //------------------------------------------------------------------------ - - //! True if range is empty. - bool empty() const { return !(my_begin<my_end); } - - //! True if range is divisible. - /** Unspecified if end()<begin(). */ - bool is_divisible() const { return my_grainsize<size(); } - - //! Split range. - /** The new Range *this has the second part, the old range r has the first part. - Unspecified if end()<begin() or !is_divisible(). */ - blocked_range( blocked_range& r, split ) : - my_end(r.my_end), - my_begin(do_split(r, split())), - my_grainsize(r.my_grainsize) - { - // only comparison 'less than' is required from values of blocked_range objects - __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); - } - - //! Split range. - /** The new Range *this has the second part split according to specified proportion, the old range r has the first part. - Unspecified if end()<begin() or !is_divisible(). */ - blocked_range( blocked_range& r, proportional_split& proportion ) : - my_end(r.my_end), - my_begin(do_split(r, proportion)), - my_grainsize(r.my_grainsize) - { - // only comparison 'less than' is required from values of blocked_range objects - __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); - } - -private: - /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */ - Value my_end; - Value my_begin; - size_type my_grainsize; - - //! Auxiliary function used by the splitting constructor. - static Value do_split( blocked_range& r, split ) - { - __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); - Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u; - r.my_end = middle; - return middle; - } - - static Value do_split( blocked_range& r, proportional_split& proportion ) - { - __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); - - // usage of 32-bit floating point arithmetic is not enough to handle ranges of - // more than 2^24 iterations accurately. However, even on ranges with 2^64 - // iterations the computational error approximately equals to 0.000001% which - // makes small impact on uniform distribution of such range's iterations (assuming - // all iterations take equal time to complete). See 'test_partitioner_whitebox' - // for implementation of an exact split algorithm - size_type right_part = size_type(float(r.size()) * float(proportion.right()) - / float(proportion.left() + proportion.right()) + 0.5f); - return r.my_end = Value(r.my_end - right_part); - } - - template<typename RowValue, typename ColValue> - friend class blocked_range2d; - - template<typename RowValue, typename ColValue, typename PageValue> - friend class blocked_range3d; - - template<typename DimValue, unsigned int N, typename> - friend class blocked_rangeNd_impl; -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::blocked_range; -// Split types -using detail::split; -using detail::proportional_split; -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_blocked_range_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range_H +#define __TBB_blocked_range_H + +#include <cstddef> + +#include "detail/_range_common.h" +#include "detail/_namespace_injection.h" + +#include "version.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/** \page range_req Requirements on range concept + Class \c R implementing the concept of range must define: + - \code R::R( const R& ); \endcode Copy constructor + - \code R::~R(); \endcode Destructor + - \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges + - \code bool R::empty() const; \endcode True if range is empty + - \code R::R( R& r, split ); \endcode Split range \c r into two subranges. +**/ + +//! A range over which to iterate. +/** @ingroup algorithms */ +template<typename Value> +class blocked_range { +public: + //! Type of a value + /** Called a const_iterator for sake of algorithms that need to treat a blocked_range + as an STL container. */ + using const_iterator = Value; + + //! Type for size of a range + using size_type = std::size_t; + + //! Construct range over half-open interval [begin,end), with the given grainsize. + blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : + my_end(end_), my_begin(begin_), my_grainsize(grainsize_) + { + __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); + } + + //! Beginning of range. + const_iterator begin() const { return my_begin; } + + //! One past last value in range. + const_iterator end() const { return my_end; } + + //! Size of the range + /** Unspecified if end()<begin(). */ + size_type size() const { + __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" ); + return size_type(my_end-my_begin); + } + + //! The grain size for this range. + size_type grainsize() const { return my_grainsize; } + + //------------------------------------------------------------------------ + // Methods that implement Range concept + //------------------------------------------------------------------------ + + //! True if range is empty. + bool empty() const { return !(my_begin<my_end); } + + //! True if range is divisible. + /** Unspecified if end()<begin(). */ + bool is_divisible() const { return my_grainsize<size(); } + + //! Split range. + /** The new Range *this has the second part, the old range r has the first part. + Unspecified if end()<begin() or !is_divisible(). */ + blocked_range( blocked_range& r, split ) : + my_end(r.my_end), + my_begin(do_split(r, split())), + my_grainsize(r.my_grainsize) + { + // only comparison 'less than' is required from values of blocked_range objects + __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); + } + + //! Split range. + /** The new Range *this has the second part split according to specified proportion, the old range r has the first part. + Unspecified if end()<begin() or !is_divisible(). */ + blocked_range( blocked_range& r, proportional_split& proportion ) : + my_end(r.my_end), + my_begin(do_split(r, proportion)), + my_grainsize(r.my_grainsize) + { + // only comparison 'less than' is required from values of blocked_range objects + __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); + } + +private: + /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */ + Value my_end; + Value my_begin; + size_type my_grainsize; + + //! Auxiliary function used by the splitting constructor. + static Value do_split( blocked_range& r, split ) + { + __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); + Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u; + r.my_end = middle; + return middle; + } + + static Value do_split( blocked_range& r, proportional_split& proportion ) + { + __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); + + // usage of 32-bit floating point arithmetic is not enough to handle ranges of + // more than 2^24 iterations accurately. However, even on ranges with 2^64 + // iterations the computational error approximately equals to 0.000001% which + // makes small impact on uniform distribution of such range's iterations (assuming + // all iterations take equal time to complete). See 'test_partitioner_whitebox' + // for implementation of an exact split algorithm + size_type right_part = size_type(float(r.size()) * float(proportion.right()) + / float(proportion.left() + proportion.right()) + 0.5f); + return r.my_end = Value(r.my_end - right_part); + } + + template<typename RowValue, typename ColValue> + friend class blocked_range2d; + + template<typename RowValue, typename ColValue, typename PageValue> + friend class blocked_range3d; + + template<typename DimValue, unsigned int N, typename> + friend class blocked_rangeNd_impl; +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_blocked_range_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h index 01ed17d859..1825285961 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h @@ -1,108 +1,108 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_blocked_range2d_H -#define __TBB_blocked_range2d_H - -#include <cstddef> - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -#include "blocked_range.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! A 2-dimensional range that models the Range concept. -/** @ingroup algorithms */ -template<typename RowValue, typename ColValue = RowValue> -class blocked_range2d { -public: - //! Type for size of an iteration range - using row_range_type = blocked_range<RowValue>; - using col_range_type = blocked_range<ColValue>; - -private: - row_range_type my_rows; - col_range_type my_cols; - -public: - blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, - ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : - my_rows(row_begin,row_end,row_grainsize), - my_cols(col_begin,col_end,col_grainsize) - {} - - blocked_range2d( RowValue row_begin, RowValue row_end, - ColValue col_begin, ColValue col_end ) : - my_rows(row_begin,row_end), - my_cols(col_begin,col_end) - {} - - //! True if range is empty - bool empty() const { - // Range is empty if at least one dimension is empty. - return my_rows.empty() || my_cols.empty(); - } - - //! True if range is divisible into two pieces. - bool is_divisible() const { - return my_rows.is_divisible() || my_cols.is_divisible(); - } - - blocked_range2d( blocked_range2d& r, split ) : - my_rows(r.my_rows), - my_cols(r.my_cols) - { - split split_obj; - do_split(r, split_obj); - } - - blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : - my_rows(r.my_rows), - my_cols(r.my_cols) - { - do_split(r, proportion); - } - - //! The rows of the iteration space - const row_range_type& rows() const { return my_rows; } - - //! The columns of the iteration space - const col_range_type& cols() const { return my_cols; } - -private: - template <typename Split> - void do_split( blocked_range2d& r, Split& split_obj ) { - if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { - my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); - } else { - my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); - } - } -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::blocked_range2d; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_blocked_range2d_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range2d_H +#define __TBB_blocked_range2d_H + +#include <cstddef> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 2-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template<typename RowValue, typename ColValue = RowValue> +class blocked_range2d { +public: + //! Type for size of an iteration range + using row_range_type = blocked_range<RowValue>; + using col_range_type = blocked_range<ColValue>; + +private: + row_range_type my_rows; + col_range_type my_cols; + +public: + blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + blocked_range2d( RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range2d( blocked_range2d& r, split ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + split split_obj; + do_split(r, split_obj); + } + + blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template <typename Split> + void do_split( blocked_range2d& r, Split& split_obj ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range2d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range2d_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h index d4178050a8..4754fa8d3c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h @@ -1,127 +1,127 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_blocked_range3d_H -#define __TBB_blocked_range3d_H - -#include <cstddef> - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -#include "blocked_range.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! A 3-dimensional range that models the Range concept. -/** @ingroup algorithms */ -template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue> -class blocked_range3d { -public: - //! Type for size of an iteration range - using page_range_type = blocked_range<PageValue>; - using row_range_type = blocked_range<RowValue>; - using col_range_type = blocked_range<ColValue>; - -private: - page_range_type my_pages; - row_range_type my_rows; - col_range_type my_cols; - -public: - - blocked_range3d( PageValue page_begin, PageValue page_end, - RowValue row_begin, RowValue row_end, - ColValue col_begin, ColValue col_end ) : - my_pages(page_begin,page_end), - my_rows(row_begin,row_end), - my_cols(col_begin,col_end) - {} - - blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, - RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, - ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : - my_pages(page_begin,page_end,page_grainsize), - my_rows(row_begin,row_end,row_grainsize), - my_cols(col_begin,col_end,col_grainsize) - {} - - //! True if range is empty - bool empty() const { - // Range is empty if at least one dimension is empty. - return my_pages.empty() || my_rows.empty() || my_cols.empty(); - } - - //! True if range is divisible into two pieces. - bool is_divisible() const { - return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); - } - - blocked_range3d( blocked_range3d& r, split split_obj ) : - my_pages(r.my_pages), - my_rows(r.my_rows), - my_cols(r.my_cols) - { - do_split(r, split_obj); - } - - blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : - my_pages(r.my_pages), - my_rows(r.my_rows), - my_cols(r.my_cols) - { - do_split(r, proportion); - } - - //! The pages of the iteration space - const page_range_type& pages() const { return my_pages; } - - //! The rows of the iteration space - const row_range_type& rows() const { return my_rows; } - - //! The columns of the iteration space - const col_range_type& cols() const { return my_cols; } - -private: - template <typename Split> - void do_split( blocked_range3d& r, Split& split_obj) { - if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { - if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { - my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); - } else { - my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); - } - } else { - if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { - my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); - } else { - my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); - } - } - } -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::blocked_range3d; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_blocked_range3d_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range3d_H +#define __TBB_blocked_range3d_H + +#include <cstddef> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 3-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue> +class blocked_range3d { +public: + //! Type for size of an iteration range + using page_range_type = blocked_range<PageValue>; + using row_range_type = blocked_range<RowValue>; + using col_range_type = blocked_range<ColValue>; + +private: + page_range_type my_pages; + row_range_type my_rows; + col_range_type my_cols; + +public: + + blocked_range3d( PageValue page_begin, PageValue page_end, + RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_pages(page_begin,page_end), + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, + RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_pages(page_begin,page_end,page_grainsize), + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_pages.empty() || my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range3d( blocked_range3d& r, split split_obj ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, split_obj); + } + + blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The pages of the iteration space + const page_range_type& pages() const { return my_pages; } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template <typename Split> + void do_split( blocked_range3d& r, Split& split_obj) { + if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } else { + if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); + } + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range3d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range3d_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h index 37b71da8fe..9b1531a07a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h @@ -1,144 +1,144 @@ -/* - Copyright (c) 2017-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_blocked_rangeNd_H -#define __TBB_blocked_rangeNd_H - -#if !TBB_PREVIEW_BLOCKED_RANGE_ND - #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h -#endif - -#include <algorithm> // std::any_of -#include <array> -#include <cstddef> -#include <type_traits> // std::is_same, std::enable_if - -#include "detail/_config.h" -#include "detail/_template_helpers.h" // index_sequence, make_index_sequence - -#include "blocked_range.h" - -namespace tbb { -namespace detail { -namespace d1 { - -/* - The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with - exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity - to use braced-init-list parameters to initialize each dimension. - Use of parameters, whose representation is a braced-init-list, but they're not - std::initializer_list or a reference to one, produces a non-deduced context - within template argument deduction. - - NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl - (and not e.g. a derived class), otherwise it would need to declare its own ctor - facing the same problem that the impl class solves. -*/ - -template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>> -class blocked_rangeNd_impl; - -template<typename Value, unsigned int N, std::size_t... Is> -class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> { -public: - //! Type of a value. - using value_type = Value; - -private: - //! Helper type to construct range with N tbb::blocked_range<value_type> objects. - template<std::size_t> - using dim_type_helper = tbb::blocked_range<value_type>; - -public: - blocked_rangeNd_impl() = delete; - - //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>. - blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {} - - //! Dimensionality of a range. - static constexpr unsigned int ndims() { return N; } - - //! Range in certain dimension. - const tbb::blocked_range<value_type>& dim(unsigned int dimension) const { - __TBB_ASSERT(dimension < N, "out of bound"); - return my_dims[dimension]; - } - - //------------------------------------------------------------------------ - // Methods that implement Range concept - //------------------------------------------------------------------------ - - //! True if at least one dimension is empty. - bool empty() const { - return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { - return d.empty(); - }); - } - - //! True if at least one dimension is divisible. - bool is_divisible() const { - return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { - return d.is_divisible(); - }); - } - - blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { - do_split(r, proportion); - } - - blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { - do_split(r, proportion); - } - -private: - static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); - - //! Ranges in each dimension. - std::array<tbb::blocked_range<value_type>, N> my_dims; - - template<typename split_type> - void do_split(blocked_rangeNd_impl& r, split_type proportion) { - static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect"); - __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); - - auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) { - return (first.size() * second.grainsize() < second.size() * first.grainsize()); - }); - - auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); - - my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion); - - // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to - // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept - __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), - "blocked_range has been split incorrectly"); - } -}; - -template<typename Value, unsigned int N> -using blocked_rangeNd = blocked_rangeNd_impl<Value, N>; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::blocked_rangeNd; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_blocked_rangeNd_H */ - +/* + Copyright (c) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_rangeNd_H +#define __TBB_blocked_rangeNd_H + +#if !TBB_PREVIEW_BLOCKED_RANGE_ND + #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h +#endif + +#include <algorithm> // std::any_of +#include <array> +#include <cstddef> +#include <type_traits> // std::is_same, std::enable_if + +#include "detail/_config.h" +#include "detail/_template_helpers.h" // index_sequence, make_index_sequence + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/* + The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with + exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity + to use braced-init-list parameters to initialize each dimension. + Use of parameters, whose representation is a braced-init-list, but they're not + std::initializer_list or a reference to one, produces a non-deduced context + within template argument deduction. + + NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl + (and not e.g. a derived class), otherwise it would need to declare its own ctor + facing the same problem that the impl class solves. +*/ + +template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>> +class blocked_rangeNd_impl; + +template<typename Value, unsigned int N, std::size_t... Is> +class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> { +public: + //! Type of a value. + using value_type = Value; + +private: + //! Helper type to construct range with N tbb::blocked_range<value_type> objects. + template<std::size_t> + using dim_type_helper = tbb::blocked_range<value_type>; + +public: + blocked_rangeNd_impl() = delete; + + //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>. + blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {} + + //! Dimensionality of a range. + static constexpr unsigned int ndims() { return N; } + + //! Range in certain dimension. + const tbb::blocked_range<value_type>& dim(unsigned int dimension) const { + __TBB_ASSERT(dimension < N, "out of bound"); + return my_dims[dimension]; + } + + //------------------------------------------------------------------------ + // Methods that implement Range concept + //------------------------------------------------------------------------ + + //! True if at least one dimension is empty. + bool empty() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { + return d.empty(); + }); + } + + //! True if at least one dimension is divisible. + bool is_divisible() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { + return d.is_divisible(); + }); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + +private: + static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); + + //! Ranges in each dimension. + std::array<tbb::blocked_range<value_type>, N> my_dims; + + template<typename split_type> + void do_split(blocked_rangeNd_impl& r, split_type proportion) { + static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect"); + __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); + + auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) { + return (first.size() * second.grainsize() < second.size() * first.grainsize()); + }); + + auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); + + my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion); + + // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to + // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept + __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), + "blocked_range has been split incorrectly"); + } +}; + +template<typename Value, unsigned int N> +using blocked_rangeNd = blocked_rangeNd_impl<Value, N>; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_rangeNd; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_rangeNd_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h index 645f3fbd2e..dbc4ec1c13 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h @@ -1,189 +1,189 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_cache_aligned_allocator_H -#define __TBB_cache_aligned_allocator_H - -#include "detail/_utils.h" -#include "detail/_namespace_injection.h" -#include <cstdlib> -#include <utility> - -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT -#error #include <memory_resource> -#endif - -namespace tbb { -namespace detail { - -namespace r1 { -void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); -void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); -std::size_t __TBB_EXPORTED_FUNC cache_line_size(); -} - -namespace d1 { - -template<typename T> -class cache_aligned_allocator { -public: - using value_type = T; - using propagate_on_container_move_assignment = std::true_type; - - //! Always defined for TBB containers (supported since C++17 for std containers) - using is_always_equal = std::true_type; - - cache_aligned_allocator() = default; - template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {} - - //! Allocate space for n objects, starting on a cache/sector line. - __TBB_nodiscard T* allocate(std::size_t n) { - return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type))); - } - - //! Free block of memory that starts on a cache line - void deallocate(T* p, std::size_t) { - r1::cache_aligned_deallocate(p); - } - - //! Largest value for which method allocate might succeed. - std::size_t max_size() const noexcept { - return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); - } - -#if TBB_ALLOCATOR_TRAITS_BROKEN - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using difference_type = std::ptrdiff_t; - using size_type = std::size_t; - template<typename U> struct rebind { - using other = cache_aligned_allocator<U>; - }; - template<typename U, typename... Args> - void construct(U *p, Args&&... args) - { ::new (p) U(std::forward<Args>(args)...); } - void destroy(pointer p) { p->~value_type(); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } -#endif // TBB_ALLOCATOR_TRAITS_BROKEN -}; - -#if TBB_ALLOCATOR_TRAITS_BROKEN - template<> - class cache_aligned_allocator<void> { - public: - using pointer = void*; - using const_pointer = const void*; - using value_type = void; - template<typename U> struct rebind { - using other = cache_aligned_allocator<U>; - }; - }; -#endif - -template<typename T, typename U> -bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; } - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template<typename T, typename U> -bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; } -#endif - -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT - -//! C++17 memory resource wrapper to ensure cache line size alignment -class cache_aligned_resource : public std::pmr::memory_resource { -public: - cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} - explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} - - std::pmr::memory_resource* upstream_resource() const { - return m_upstream; - } - -private: - //! We don't know what memory resource set. Use padding to guarantee alignment - void* do_allocate(std::size_t bytes, std::size_t alignment) override { - // TODO: make it common with tbb_allocator.cpp - std::size_t cache_line_alignment = correct_alignment(alignment); - std::size_t space = correct_size(bytes) + cache_line_alignment; - std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space)); - __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); - - // Round up to the next cache line (align the base address) - std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); - __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); - __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); - - // Record where block actually starts. - (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; - return reinterpret_cast<void*>(result); - } - - void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { - if (ptr) { - // Recover where block actually starts - std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1]; - m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment)); - } - } - - bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { - if (this == &other) { return true; } -#if __TBB_USE_OPTIONAL_RTTI - const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other); - return other_res && (upstream_resource() == other_res->upstream_resource()); -#else - return false; -#endif - } - - std::size_t correct_alignment(std::size_t alignment) { - __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); -#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT - std::size_t cache_line_size = std::hardware_destructive_interference_size; -#else - std::size_t cache_line_size = r1::cache_line_size(); -#endif - return alignment < cache_line_size ? cache_line_size : alignment; - } - - std::size_t correct_size(std::size_t bytes) { - // To handle the case, when small size requested. There could be not - // enough space to store the original pointer. - return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; - } - - std::pmr::memory_resource* m_upstream; -}; - -#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::cache_aligned_allocator; -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT -using detail::d1::cache_aligned_resource; -#endif -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_cache_aligned_allocator_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_cache_aligned_allocator_H +#define __TBB_cache_aligned_allocator_H + +#include "detail/_utils.h" +#include "detail/_namespace_injection.h" +#include <cstdlib> +#include <utility> + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +std::size_t __TBB_EXPORTED_FUNC cache_line_size(); +} + +namespace d1 { + +template<typename T> +class cache_aligned_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers (supported since C++17 for std containers) + using is_always_equal = std::true_type; + + cache_aligned_allocator() = default; + template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {} + + //! Allocate space for n objects, starting on a cache/sector line. + __TBB_nodiscard T* allocate(std::size_t n) { + return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type))); + } + + //! Free block of memory that starts on a cache line + void deallocate(T* p, std::size_t) { + r1::cache_aligned_deallocate(p); + } + + //! Largest value for which method allocate might succeed. + std::size_t max_size() const noexcept { + return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = cache_aligned_allocator<U>; + }; + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new (p) U(std::forward<Args>(args)...); } + void destroy(pointer p) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class cache_aligned_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = cache_aligned_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; } +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +//! C++17 memory resource wrapper to ensure cache line size alignment +class cache_aligned_resource : public std::pmr::memory_resource { +public: + cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} + explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} + + std::pmr::memory_resource* upstream_resource() const { + return m_upstream; + } + +private: + //! We don't know what memory resource set. Use padding to guarantee alignment + void* do_allocate(std::size_t bytes, std::size_t alignment) override { + // TODO: make it common with tbb_allocator.cpp + std::size_t cache_line_alignment = correct_alignment(alignment); + std::size_t space = correct_size(bytes) + cache_line_alignment; + std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space)); + __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); + + // Round up to the next cache line (align the base address) + std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); + __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); + __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); + + // Record where block actually starts. + (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; + return reinterpret_cast<void*>(result); + } + + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { + if (ptr) { + // Recover where block actually starts + std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1]; + m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment)); + } + } + + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + if (this == &other) { return true; } +#if __TBB_USE_OPTIONAL_RTTI + const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other); + return other_res && (upstream_resource() == other_res->upstream_resource()); +#else + return false; +#endif + } + + std::size_t correct_alignment(std::size_t alignment) { + __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); +#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT + std::size_t cache_line_size = std::hardware_destructive_interference_size; +#else + std::size_t cache_line_size = r1::cache_line_size(); +#endif + return alignment < cache_line_size ? cache_line_size : alignment; + } + + std::size_t correct_size(std::size_t bytes) { + // To handle the case, when small size requested. There could be not + // enough space to store the original pointer. + return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; + } + + std::pmr::memory_resource* m_upstream; +}; + +#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::cache_aligned_allocator; +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +using detail::d1::cache_aligned_resource; +#endif +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_cache_aligned_allocator_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/combinable.h b/contrib/libs/tbb/include/oneapi/tbb/combinable.h index b676a30cc0..e211c970aa 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/combinable.h +++ b/contrib/libs/tbb/include/oneapi/tbb/combinable.h @@ -1,69 +1,69 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_combinable_H -#define __TBB_combinable_H - -#include "detail/_namespace_injection.h" - -#include "enumerable_thread_specific.h" -#include "cache_aligned_allocator.h" - -namespace tbb { -namespace detail { -namespace d1 { -/** \name combinable **/ -//@{ -//! Thread-local storage with optional reduction -/** @ingroup containers */ -template <typename T> -class combinable { - using my_alloc = typename tbb::cache_aligned_allocator<T>; - using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>; - my_ets_type my_ets; - -public: - combinable() = default; - - template <typename Finit> - explicit combinable(Finit _finit) : my_ets(_finit) { } - - void clear() { my_ets.clear(); } - - T& local() { return my_ets.local(); } - - T& local(bool& exists) { return my_ets.local(exists); } - - // combine_func_t has signature T(T,T) or T(const T&, const T&) - template <typename CombineFunc> - T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } - - // combine_func_t has signature void(T) or void(const T&) - template <typename CombineFunc> - void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::combinable; -} // inline namespace v1 - -} // namespace tbb - -#endif /* __TBB_combinable_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_combinable_H +#define __TBB_combinable_H + +#include "detail/_namespace_injection.h" + +#include "enumerable_thread_specific.h" +#include "cache_aligned_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { +/** \name combinable **/ +//@{ +//! Thread-local storage with optional reduction +/** @ingroup containers */ +template <typename T> +class combinable { + using my_alloc = typename tbb::cache_aligned_allocator<T>; + using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>; + my_ets_type my_ets; + +public: + combinable() = default; + + template <typename Finit> + explicit combinable(Finit _finit) : my_ets(_finit) { } + + void clear() { my_ets.clear(); } + + T& local() { return my_ets.local(); } + + T& local(bool& exists) { return my_ets.local(exists); } + + // combine_func_t has signature T(T,T) or T(const T&, const T&) + template <typename CombineFunc> + T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } + + // combine_func_t has signature void(T) or void(const T&) + template <typename CombineFunc> + void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::combinable; +} // inline namespace v1 + +} // namespace tbb + +#endif /* __TBB_combinable_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h index 510557e9f2..1019e2fd3c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h @@ -1,1524 +1,1524 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_hash_map_H -#define __TBB_concurrent_hash_map_H - -#include "detail/_namespace_injection.h" -#include "detail/_utils.h" -#include "detail/_assert.h" -#include "detail/_allocator_traits.h" -#include "detail/_containers_helpers.h" -#include "detail/_template_helpers.h" -#include "detail/_hash_compare.h" -#include "detail/_range_common.h" -#include "tbb_allocator.h" -#include "spin_rw_mutex.h" - -#include <atomic> -#include <initializer_list> -#include <tuple> -#include <iterator> -#include <utility> // Need std::pair -#include <cstring> // Need std::memset - -namespace tbb { -namespace detail { -namespace d1 { - -struct hash_map_node_base : no_copy { - using mutex_type = spin_rw_mutex; - // Scoped lock type for mutex - using scoped_type = mutex_type::scoped_lock; - // Next node in chain - hash_map_node_base* next; - mutex_type mutex; -}; - -// Incompleteness flag value -static hash_map_node_base* const rehash_req = reinterpret_cast<hash_map_node_base*>(std::size_t(3)); -// Rehashed empty bucket flag -static hash_map_node_base* const empty_rehashed = reinterpret_cast<hash_map_node_base*>(std::size_t(0)); - -// base class of concurrent_hash_map - -template <typename Allocator> -class hash_map_base { -public: - using size_type = std::size_t; - using hashcode_type = std::size_t; - using segment_index_type = std::size_t; - using node_base = hash_map_node_base; - - struct bucket : no_copy { - using mutex_type = spin_rw_mutex; - using scoped_type = mutex_type::scoped_lock; - - bucket() : node_list(nullptr) {} - bucket( node_base* ptr ) : node_list(ptr) {} - - mutex_type mutex; - std::atomic<node_base*> node_list; - }; - - using allocator_type = Allocator; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>; - using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>; - - // Count of segments in the first block - static constexpr size_type embedded_block = 1; - // Count of segments in the first block - static constexpr size_type embedded_buckets = 1 << embedded_block; - // Count of segments in the first block - static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 - // Size of a pointer / table size - static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit - - using segment_ptr_type = bucket*; - using atomic_segment_type = std::atomic<segment_ptr_type>; - using segments_table_type = atomic_segment_type[pointers_per_table]; - - hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { - for (size_type i = 0; i != embedded_buckets; ++i) { - my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); - } - - for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { - auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; - my_table[segment_index].store(argument, std::memory_order_relaxed); - } - - __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); - } - - // segment index of given index in the array - static segment_index_type segment_index_of( size_type index ) { - return segment_index_type(tbb::detail::log2( index|1 )); - } - - // the first array index of given segment - static segment_index_type segment_base( segment_index_type k ) { - return (segment_index_type(1) << k & ~segment_index_type(1)); - } - - // segment size except for k == 0 - static size_type segment_size( segment_index_type k ) { - return size_type(1) << k; // fake value for k==0 - } - - // true if ptr is valid pointer - static bool is_valid( void* ptr ) { - return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63); - } - - template <typename... Args> - void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { - for (size_type i = 0; i < sz; ++i) { - bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...); - } - } - - // Initialize buckets - void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { - if (is_initial) { - init_buckets_impl(ptr, sz); - } else { - init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req)); - } - } - - // Add node n to bucket b - static void add_to_bucket( bucket* b, node_base* n ) { - __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); - n->next = b->node_list.load(std::memory_order_relaxed); - b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set - } - - const bucket_allocator_type& get_allocator() const { - return my_allocator; - } - - bucket_allocator_type& get_allocator() { - return my_allocator; - } - - // Enable segment - void enable_segment( segment_index_type k, bool is_initial = false ) { - __TBB_ASSERT( k, "Zero segment must be embedded" ); - size_type sz; - __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); - if (k >= first_block) { - sz = segment_size(k); - segment_ptr_type ptr = nullptr; - try_call( [&] { - ptr = bucket_allocator_traits::allocate(my_allocator, sz); - } ).on_exception( [&] { - my_table[k].store(nullptr, std::memory_order_relaxed); - }); - - __TBB_ASSERT(ptr, nullptr); - init_buckets(ptr, sz, is_initial); - my_table[k].store(ptr, std::memory_order_release); - sz <<= 1;// double it to get entire capacity of the container - } else { // the first block - __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); - sz = segment_size(first_block); - segment_ptr_type ptr = nullptr; - try_call( [&] { - ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); - } ).on_exception( [&] { - my_table[k].store(nullptr, std::memory_order_relaxed); - }); - - __TBB_ASSERT(ptr, nullptr); - init_buckets(ptr, sz - embedded_buckets, is_initial); - ptr -= segment_base(embedded_block); - for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets - my_table[i].store(ptr + segment_base(i), std::memory_order_release); - } - my_mask.store(sz-1, std::memory_order_release); - } - - void delete_segment( segment_index_type s ) { - segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); - size_type sz = segment_size( s ? s : 1 ); - - size_type deallocate_size = 0; - - if (s >= first_block) { // the first segment or the next - deallocate_size = sz; - } else if (s == embedded_block && embedded_block != first_block) { - deallocate_size = segment_size(first_block) - embedded_buckets; - } - - for (size_type i = 0; i < deallocate_size; ++i) { - bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i); - } - if (deallocate_size != 0) { - bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size); - } - - if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); - } - - // Get bucket by (masked) hashcode - bucket *get_bucket( hashcode_type h ) const noexcept { - segment_index_type s = segment_index_of( h ); - h -= segment_base(s); - segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); - __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); - return &seg[h]; - } - - // detail serial rehashing helper - void mark_rehashed_levels( hashcode_type h ) noexcept { - segment_index_type s = segment_index_of( h ); - while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) - if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) { - seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed); - mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s) - } - } - - // Check for mask race - // Splitting into two functions should help inlining - inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const { - hashcode_type m_now, m_old = m; - m_now = my_mask.load(std::memory_order_acquire); - if (m_old != m_now) { - return check_rehashing_collision(h, m_old, m = m_now); - } - return false; - } - - // Process mask race, check for rehashing collision - bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const { - __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m - if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event - // condition above proves that 'h' has some other bits set beside 'm_old' - // find next applicable mask after m_old //TODO: look at bsl instruction - for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size - ; - m_old = (m_old<<1) - 1; // get full mask from a bit - __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr); - // check whether it is rehashing/ed - if( get_bucket(h & m_old)->node_list.load(std::memory_order_acquire) != rehash_req ) { - return true; - } - } - return false; - } - - // Insert a node and check for load factor. @return segment index to enable. - segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { - size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted - add_to_bucket( b, n ); - // check load factor - if( sz >= mask ) { // TODO: add custom load_factor - segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of - __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); - static const segment_ptr_type is_allocating = segment_ptr_type(2);; - segment_ptr_type disabled = nullptr; - if (!(my_table[new_seg].load(std::memory_order_acquire)) - && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) - return new_seg; // The value must be processed - } - return 0; - } - - // Prepare enough segments for number of buckets - void reserve(size_type buckets) { - if( !buckets-- ) return; - bool is_initial = !my_size.load(std::memory_order_relaxed); - for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; - m = my_mask.load(std::memory_order_relaxed)) - { - enable_segment( segment_index_of( m+1 ), is_initial ); - } - } - - // Swap hash_map_bases - void internal_swap_content(hash_map_base &table) { - using std::swap; - swap_atomics_relaxed(my_mask, table.my_mask); - swap_atomics_relaxed(my_size, table.my_size); - - for(size_type i = 0; i < embedded_buckets; i++) { - auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); - my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), - std::memory_order_relaxed); - table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); - } - for(size_type i = embedded_block; i < pointers_per_table; i++) { - auto temp = my_table[i].load(std::memory_order_relaxed); - my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), - std::memory_order_relaxed); - table.my_table[i].store(temp, std::memory_order_relaxed); - } - } - - void internal_move(hash_map_base&& other) { - my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); - - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_size.store(0, std::memory_order_relaxed); - - for (size_type i = 0; i < embedded_buckets; ++i) { - my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); - other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); - } - - for (size_type i = embedded_block; i < pointers_per_table; ++i) { - my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), - std::memory_order_relaxed); - other.my_table[i].store(nullptr, std::memory_order_relaxed); - } - } - -protected: - - bucket_allocator_type my_allocator; - // Hash mask = sum of allocated segment sizes - 1 - std::atomic<hashcode_type> my_mask; - // Size of container in stored items - std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects - // Zero segment - bucket my_embedded_segment[embedded_buckets]; - // Segment pointers table. Also prevents false sharing between my_mask and my_size - segments_table_type my_table; -}; - -template <typename Iterator> -class hash_map_range; - -// Meets requirements of a forward iterator for STL -// Value is either the T or const T type of the container. -template <typename Container, typename Value> -class hash_map_iterator { - using map_type = Container; - using node = typename Container::node; - using map_base = typename Container::base_type; - using node_base = typename map_base::node_base; - using bucket = typename map_base::bucket; -public: - using value_type = Value; - using size_type = typename Container::size_type; - using difference_type = typename Container::difference_type; - using pointer = value_type*; - using reference = value_type&; - using iterator_category = std::forward_iterator_tag; - - // Construct undefined iterator - hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} - hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) : - my_map(other.my_map), - my_index(other.my_index), - my_bucket(other.my_bucket), - my_node(other.my_node) - {} - - hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) { - my_map = other.my_map; - my_index = other.my_index; - my_bucket = other.my_bucket; - my_node = other.my_node; - return *this; - } - - Value& operator*() const { - __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); - return my_node->value(); - } - - Value* operator->() const {return &operator*();} - - hash_map_iterator& operator++() { - my_node = static_cast<node*>( my_node->next ); - if( !my_node ) advance_to_next_bucket(); - return *this; - } - - // Post increment - hash_map_iterator operator++(int) { - hash_map_iterator old(*this); - operator++(); - return old; - } -private: - template <typename C, typename T, typename U> - friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); - - template <typename C, typename T, typename U> - friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); - - template <typename C, typename T, typename U> - friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); - - template <typename C, typename U> - friend class hash_map_iterator; - - template <typename I> - friend class hash_map_range; - - void advance_to_next_bucket() { // TODO?: refactor to iterator_base class - size_t k = my_index+1; - __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); - while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { - // Following test uses 2's-complement wizardry - if( k&(k-2) ) // not the beginning of a segment - ++my_bucket; - else my_bucket = my_map->get_bucket( k ); - my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) ); - if( map_base::is_valid(my_node) ) { - my_index = k; return; - } - ++k; - } - my_bucket = 0; my_node = 0; my_index = k; // the end - } - - template <typename Key, typename T, typename HashCompare, typename A> - friend class concurrent_hash_map; - - hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : - my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n)) - { - if( b && !map_base::is_valid(n) ) - advance_to_next_bucket(); - } - - // concurrent_hash_map over which we are iterating. - const Container *my_map; - // Index in hash table for current item - size_t my_index; - // Pointer to bucket - const bucket* my_bucket; - // Pointer to node that has current item - node* my_node; -}; - -template <typename Container, typename T, typename U> -bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { - return i.my_node == j.my_node && i.my_map == j.my_map; -} - -template <typename Container, typename T, typename U> -bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { - return i.my_node != j.my_node || i.my_map != j.my_map; -} - -// Range class used with concurrent_hash_map -template <typename Iterator> -class hash_map_range { - using map_type = typename Iterator::map_type; -public: - // Type for size of a range - using size_type = std::size_t; - using value_type = typename Iterator::value_type; - using reference = typename Iterator::reference; - using difference_type = typename Iterator::difference_type; - using iterator = Iterator; - - // True if range is empty. - bool empty() const {return my_begin == my_end;} - - // True if range can be partitioned into two subranges. - bool is_divisible() const { - return my_midpoint != my_end; - } - - // Split range. - hash_map_range( hash_map_range& r, split ) : - my_end(r.my_end), - my_grainsize(r.my_grainsize) - { - r.my_end = my_begin = r.my_midpoint; - __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); - __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); - set_midpoint(); - r.set_midpoint(); - } - - // Init range with container and grainsize specified - hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : - my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), - my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), - my_grainsize( grainsize_ ) - { - __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); - set_midpoint(); - } - - const Iterator begin() const { return my_begin; } - const Iterator end() const { return my_end; } - // The grain size for this range. - size_type grainsize() const { return my_grainsize; } - -private: - Iterator my_begin; - Iterator my_end; - mutable Iterator my_midpoint; - size_t my_grainsize; - // Set my_midpoint to point approximately half way between my_begin and my_end. - void set_midpoint() const; - template <typename U> friend class hash_map_range; -}; - -template <typename Iterator> -void hash_map_range<Iterator>::set_midpoint() const { - // Split by groups of nodes - size_t m = my_end.my_index-my_begin.my_index; - if( m > my_grainsize ) { - m = my_begin.my_index + m/2u; - auto b = my_begin.my_map->get_bucket(m); - my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); - } else { - my_midpoint = my_end; - } - __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, - "my_begin is after my_midpoint" ); - __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, - "my_midpoint is after my_end" ); - __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, - "[my_begin, my_midpoint) range should not be empty" ); -} - -template <typename Key, typename T, - typename HashCompare = tbb_hash_compare<Key>, - typename Allocator = tbb_allocator<std::pair<const Key, T>>> -class concurrent_hash_map : protected hash_map_base<Allocator> { - template <typename Container, typename Value> - friend class hash_map_iterator; - - template <typename I> - friend class hash_map_range; - using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; -public: - using base_type = hash_map_base<Allocator>; - using key_type = Key; - using mapped_type = T; - // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors - // and copy/move constructor with explicit allocator argument - using allocator_type = tbb::detail::type_identity_t<Allocator>; - using hash_compare_type = tbb::detail::type_identity_t<HashCompare>; - using value_type = std::pair<const Key, T>; - using size_type = typename base_type::size_type; - using difference_type = std::ptrdiff_t; - - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using reference = value_type&; - using const_reference = const value_type&; - using iterator = hash_map_iterator<concurrent_hash_map, value_type>; - using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>; - using range_type = hash_map_range<iterator>; - using const_range_type = hash_map_range<const_iterator>; - -protected: - static_assert(std::is_same<value_type, typename Allocator::value_type>::value, - "value_type of the container must be the same as its allocator's"); - - friend class const_accessor; - class node; - using segment_index_type = typename base_type::segment_index_type; - using segment_ptr_type = typename base_type::segment_ptr_type; - using node_base = typename base_type::node_base; - using bucket = typename base_type::bucket; - using hashcode_type = typename base_type::hashcode_type; - using bucket_allocator_type = typename base_type::bucket_allocator_type; - using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>; - using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; - hash_compare_type my_hash_compare; - - class node : public node_base { - public: - node() {} - ~node() {} - pointer storage() { return &my_value; } - value_type& value() { return *storage(); } - private: - union { - value_type my_value; - }; - }; - - void delete_node( node_base *n ) { - node_allocator_type node_allocator(this->get_allocator()); - node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage()); - node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)); - node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1); - } - - template <typename... Args> - static node* create_node(bucket_allocator_type& allocator, Args&&... args) { - node_allocator_type node_allocator(allocator); - node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); - auto guard = make_raii_guard([&] { - node_allocator_traits::destroy(node_allocator, node_ptr); - node_allocator_traits::deallocate(node_allocator, node_ptr, 1); - }); - - node_allocator_traits::construct(node_allocator, node_ptr); - node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...); - guard.dismiss(); - return node_ptr; - } - - static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ - return create_node(allocator, key, *t); - } - - static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ - return create_node(allocator, key, std::move(*const_cast<T*>(t))); - } - - static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){ - // Emplace construct an empty T object inside the pair - return create_node(allocator, std::piecewise_construct, - std::forward_as_tuple(key), std::forward_as_tuple()); - } - - static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ - __TBB_ASSERT(false,"this dummy function should not be called"); - return nullptr; - } - - node *search_bucket( const key_type &key, bucket *b ) const { - node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) ); - while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) - n = static_cast<node*>( n->next ); - __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket"); - return n; - } - - // bucket accessor is to find, rehash, acquire a lock, and access a bucket - class bucket_accessor : public bucket::scoped_type { - bucket *my_b; - public: - bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } - // find a bucket by masked hashcode, optionally rehash, and acquire the lock - inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { - my_b = base->get_bucket( h ); - // TODO: actually, notification is unnecessary here, just hiding double-check - if( my_b->node_list.load(std::memory_order_acquire) == rehash_req - && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) - { - if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing - } - else bucket::scoped_type::acquire( my_b->mutex, writer ); - __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); - } - // check whether bucket is locked for write - bool is_writer() { return bucket::scoped_type::m_is_writer; } - // get bucket pointer - bucket *operator() () { return my_b; } - }; - - // TODO refactor to hash_base - void rehash_bucket( bucket *b_new, const hashcode_type hash ) { - __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)"); - __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); - b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed - hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit - bucket_accessor b_old( this, hash & mask ); - - mask = (mask<<1) | 1; // get full mask for new bucket - __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); - restart: - node_base* prev = nullptr; - node_base* curr = b_old()->node_list.load(std::memory_order_acquire); - while (this->is_valid(curr)) { - hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); - - if ((curr_node_hash & mask) == hash) { - if (!b_old.is_writer()) { - if (!b_old.upgrade_to_writer()) { - goto restart; // node ptr can be invalid due to concurrent erase - } - } - node_base* next = curr->next; - // exclude from b_old - if (prev == nullptr) { - b_old()->node_list.store(curr->next, std::memory_order_relaxed); - } else { - prev->next = curr->next; - } - this->add_to_bucket(b_new, curr); - curr = next; - } else { - prev = curr; - curr = curr->next; - } - } - } - -public: - - class accessor; - // Combines data access, locking, and garbage collection. - class const_accessor : private node::scoped_type /*which derived from no_copy*/ { - friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; - friend class accessor; - public: - // Type of value - using value_type = const typename concurrent_hash_map::value_type; - - // True if result is empty. - bool empty() const { return !my_node; } - - // Set to null - void release() { - if( my_node ) { - node::scoped_type::release(); - my_node = 0; - } - } - - // Return reference to associated value in hash table. - const_reference operator*() const { - __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); - return my_node->value(); - } - - // Return pointer to associated value in hash table. - const_pointer operator->() const { - return &operator*(); - } - - // Create empty result - const_accessor() : my_node(nullptr) {} - - // Destroy result after releasing the underlying reference. - ~const_accessor() { - my_node = nullptr; // scoped lock's release() is called in its destructor - } - protected: - bool is_writer() { return node::scoped_type::m_is_writer; } - node *my_node; - hashcode_type my_hash; - }; - - // Allows write access to elements and combines data access, locking, and garbage collection. - class accessor: public const_accessor { - public: - // Type of value - using value_type = typename concurrent_hash_map::value_type; - - // Return reference to associated value in hash table. - reference operator*() const { - __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); - return this->my_node->value(); - } - - // Return pointer to associated value in hash table. - pointer operator->() const { - return &operator*(); - } - }; - - explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() ) - : base_type(a) - , my_hash_compare(compare) - {} - - concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {} - - explicit concurrent_hash_map( const allocator_type& a ) - : concurrent_hash_map(hash_compare_type(), a) - {} - - // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. - concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) - : concurrent_hash_map(a) - { - this->reserve(n); - } - - concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) - : concurrent_hash_map(compare, a) - { - this->reserve(n); - } - - // Copy constructor - concurrent_hash_map( const concurrent_hash_map &table ) - : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) - { - try_call( [&] { - internal_copy(table); - }).on_exception( [&] { - this->clear(); - }); - } - - concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) - : concurrent_hash_map(a) - { - try_call( [&] { - internal_copy(table); - }).on_exception( [&] { - this->clear(); - }); - } - - // Move constructor - concurrent_hash_map( concurrent_hash_map &&table ) - : concurrent_hash_map(std::move(table.get_allocator())) - { - this->internal_move(std::move(table)); - } - - // Move constructor - concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) - : concurrent_hash_map(a) - { - using is_equal_type = typename node_allocator_traits::is_always_equal; - internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); - } - - // Construction with copying iteration range and given allocator instance - template <typename I> - concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) - : concurrent_hash_map(a) - { - try_call( [&] { - internal_copy(first, last, std::distance(first, last)); - }).on_exception( [&] { - this->clear(); - }); - } - - template <typename I> - concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) - : concurrent_hash_map(compare, a) - { - try_call( [&] { - internal_copy(first, last, std::distance(first, last)); - }).on_exception( [&] { - this->clear(); - }); - } - - concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() ) - : concurrent_hash_map(compare, a) - { - try_call( [&] { - internal_copy(il.begin(), il.end(), il.size()); - }).on_exception( [&] { - this->clear(); - }); - } - - concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a ) - : concurrent_hash_map(il, hash_compare_type(), a) {} - - // Assignment - concurrent_hash_map& operator=( const concurrent_hash_map &table ) { - if( this != &table ) { - clear(); - copy_assign_allocators(this->my_allocator, table.my_allocator); - internal_copy(table); - } - return *this; - } - - // Move Assignment - concurrent_hash_map& operator=( concurrent_hash_map &&table ) { - if( this != &table ) { - using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; - using is_equal_type = typename node_allocator_traits::is_always_equal; - move_assign_allocators(this->my_allocator, table.my_allocator); - internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>()); - } - return *this; - } - - // Assignment - concurrent_hash_map& operator=( std::initializer_list<value_type> il ) { - clear(); - internal_copy(il.begin(), il.end(), il.size()); - return *this; - } - - // Rehashes and optionally resizes the whole table. - /** Useful to optimize performance before or after concurrent operations. - Also enables using of find() and count() concurrent methods in serial context. */ - void rehash(size_type sz = 0) { - this->reserve(sz); // TODO: add reduction of number of buckets as well - hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); - hashcode_type b = (mask+1)>>1; // size or first index of the last segment - __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 - bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing - for(; b <= mask; b++, bp++ ) { - node_base *n = bp->node_list.load(std::memory_order_relaxed); - __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" ); - __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); - if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one - hashcode_type h = b; bucket *b_old = bp; - do { - __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); - hashcode_type m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit - b_old = this->get_bucket( h &= m ); - } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req ); - // now h - is index of the root rehashed bucket b_old - this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments - node_base* prev = nullptr; - node_base* curr = b_old->node_list.load(std::memory_order_relaxed); - while (this->is_valid(curr)) { - hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); - - if ((curr_node_hash & mask) != h) { // should be rehashed - node_base* next = curr->next; - // exclude from b_old - if (prev == nullptr) { - b_old->node_list.store(curr->next, std::memory_order_relaxed); - } else { - prev->next = curr->next; - } - bucket *b_new = this->get_bucket(curr_node_hash & mask); - __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" ); - this->add_to_bucket(b_new, curr); - curr = next; - } else { - prev = curr; - curr = curr->next; - } - } - } - } - } - - // Clear table - void clear() { - hashcode_type m = this->my_mask.load(std::memory_order_relaxed); - __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); - this->my_size.store(0, std::memory_order_relaxed); - segment_index_type s = this->segment_index_of( m ); - __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); - do { - __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); - segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); - size_type sz = this->segment_size( s ? s : 1 ); - for( segment_index_type i = 0; i < sz; i++ ) - for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); - this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) - { - buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); - delete_node( n ); - } - this->delete_segment(s); - } while(s-- > 0); - this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); - } - - // Clear table and destroy it. - ~concurrent_hash_map() { clear(); } - - //------------------------------------------------------------------------ - // Parallel algorithm support - //------------------------------------------------------------------------ - range_type range( size_type grainsize=1 ) { - return range_type( *this, grainsize ); - } - const_range_type range( size_type grainsize=1 ) const { - return const_range_type( *this, grainsize ); - } - - //------------------------------------------------------------------------ - // STL support - not thread-safe methods - //------------------------------------------------------------------------ - iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } - const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } - const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } - iterator end() { return iterator( *this, 0, 0, 0 ); } - const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } - const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } - std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); } - std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } - - // Number of items in table. - size_type size() const { return this->my_size.load(std::memory_order_acquire); } - - // True if size()==0. - __TBB_nodiscard bool empty() const { return size() == 0; } - - // Upper bound on size. - size_type max_size() const { - return allocator_traits_type::max_size(base_type::get_allocator()); - } - - // Returns the current number of buckets - size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } - - // return allocator object - allocator_type get_allocator() const { return base_type::get_allocator(); } - - // swap two instances. Iterators are invalidated - void swap(concurrent_hash_map& table) { - using pocs_type = typename node_allocator_traits::propagate_on_container_swap; - using is_equal_type = typename node_allocator_traits::is_always_equal; - swap_allocators(this->my_allocator, table.my_allocator); - internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>()); - } - - //------------------------------------------------------------------------ - // concurrent map operations - //------------------------------------------------------------------------ - - // Return count of items (0 or 1) - size_type count( const Key &key ) const { - return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node ); - } - - // Find item and acquire a read lock on the item. - /** Return true if item is found, false otherwise. */ - bool find( const_accessor &result, const Key &key ) const { - result.release(); - return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); - } - - // Find item and acquire a write lock on the item. - /** Return true if item is found, false otherwise. */ - bool find( accessor &result, const Key &key ) { - result.release(); - return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node ); - } - - // Insert item (if not already present) and acquire a read lock on the item. - /** Returns true if item is new. */ - bool insert( const_accessor &result, const Key &key ) { - result.release(); - return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct ); - } - - // Insert item (if not already present) and acquire a write lock on the item. - /** Returns true if item is new. */ - bool insert( accessor &result, const Key &key ) { - result.release(); - return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct ); - } - - // Insert item by copying if there is no such key present already and acquire a read lock on the item. - /** Returns true if item is new. */ - bool insert( const_accessor &result, const value_type &value ) { - result.release(); - return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct ); - } - - // Insert item by copying if there is no such key present already and acquire a write lock on the item. - /** Returns true if item is new. */ - bool insert( accessor &result, const value_type &value ) { - result.release(); - return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct ); - } - - // Insert item by copying if there is no such key present already - /** Returns true if item is inserted. */ - bool insert( const value_type &value ) { - return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct ); - } - - // Insert item by copying if there is no such key present already and acquire a read lock on the item. - /** Returns true if item is new. */ - bool insert( const_accessor &result, value_type && value ) { - return generic_move_insert(result, std::move(value)); - } - - // Insert item by copying if there is no such key present already and acquire a write lock on the item. - /** Returns true if item is new. */ - bool insert( accessor &result, value_type && value ) { - return generic_move_insert(result, std::move(value)); - } - - // Insert item by copying if there is no such key present already - /** Returns true if item is inserted. */ - bool insert( value_type && value ) { - return generic_move_insert(accessor_not_used(), std::move(value)); - } - - // Insert item by copying if there is no such key present already and acquire a read lock on the item. - /** Returns true if item is new. */ - template <typename... Args> - bool emplace( const_accessor &result, Args&&... args ) { - return generic_emplace(result, std::forward<Args>(args)...); - } - - // Insert item by copying if there is no such key present already and acquire a write lock on the item. - /** Returns true if item is new. */ - template <typename... Args> - bool emplace( accessor &result, Args&&... args ) { - return generic_emplace(result, std::forward<Args>(args)...); - } - - // Insert item by copying if there is no such key present already - /** Returns true if item is inserted. */ - template <typename... Args> - bool emplace( Args&&... args ) { - return generic_emplace(accessor_not_used(), std::forward<Args>(args)...); - } - - // Insert range [first, last) - template <typename I> - void insert( I first, I last ) { - for ( ; first != last; ++first ) - insert( *first ); - } - - // Insert initializer list - void insert( std::initializer_list<value_type> il ) { - insert( il.begin(), il.end() ); - } - - // Erase item. - /** Return true if item was erased by particularly this call. */ - bool erase( const Key &key ) { - node_base *erase_node; - hashcode_type const hash = my_hash_compare.hash(key); - hashcode_type mask = this->my_mask.load(std::memory_order_acquire); - restart: - {//lock scope - // get bucket - bucket_accessor b( this, hash & mask ); - search: - node_base* prev = nullptr; - erase_node = b()->node_list.load(std::memory_order_relaxed); - while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) { - prev = erase_node; - erase_node = erase_node->next; - } - - if (erase_node == nullptr) { // not found, but mask could be changed - if (this->check_mask_race(hash, mask)) - goto restart; - return false; - } else if (!b.is_writer() && !b.upgrade_to_writer()) { - if (this->check_mask_race(hash, mask)) // contended upgrade, check mask - goto restart; - goto search; - } - - // remove from container - if (prev == nullptr) { - b()->node_list.store(erase_node->next, std::memory_order_relaxed); - } else { - prev->next = erase_node->next; - } - this->my_size--; - } - { - typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); - } - // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! - delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket - return true; - } - - // Erase item by const_accessor. - /** Return true if item was erased by particularly this call. */ - bool erase( const_accessor& item_accessor ) { - return exclude( item_accessor ); - } - - // Erase item by accessor. - /** Return true if item was erased by particularly this call. */ - bool erase( accessor& item_accessor ) { - return exclude( item_accessor ); - } - -protected: - // Insert or find item and optionally acquire a lock on the item. - bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&, - const Key&, const T*), node *tmp_n = 0) - { - __TBB_ASSERT( !result || !result->my_node, nullptr ); - bool return_value; - hashcode_type const h = my_hash_compare.hash( key ); - hashcode_type m = this->my_mask.load(std::memory_order_acquire); - segment_index_type grow_segment = 0; - node *n; - restart: - {//lock scope - __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); - return_value = false; - // get bucket - bucket_accessor b( this, h & m ); - // find a node - n = search_bucket( key, b() ); - if( op_insert ) { - // [opt] insert a key - if( !n ) { - if( !tmp_n ) { - tmp_n = allocate_node(base_type::get_allocator(), key, t); - } - if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion - // Rerun search_list, in case another thread inserted the item during the upgrade. - n = search_bucket( key, b() ); - if( this->is_valid(n) ) { // unfortunately, it did - b.downgrade_to_reader(); - goto exists; - } - } - if( this->check_mask_race(h, m) ) - goto restart; // b.release() is done in ~b(). - // insert and set flag to grow the container - grow_segment = this->insert_new_node( b(), n = tmp_n, m ); - tmp_n = 0; - return_value = true; - } - } else { // find or count - if( !n ) { - if( this->check_mask_race( h, m ) ) - goto restart; // b.release() is done in ~b(). TODO: replace by continue - return false; - } - return_value = true; - } - exists: - if( !result ) goto check_growth; - // TODO: the following seems as generic/regular operation - // acquire the item - if( !result->try_acquire( n->mutex, write ) ) { - for( tbb::detail::atomic_backoff backoff(true);; ) { - if( result->try_acquire( n->mutex, write ) ) break; - if( !backoff.bounded_pause() ) { - // the wait takes really long, restart the operation - b.release(); - __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" ); - yield(); - m = this->my_mask.load(std::memory_order_acquire); - goto restart; - } - } - } - }//lock scope - result->my_node = n; - result->my_hash = h; - check_growth: - // [opt] grow the container - if( grow_segment ) { - this->enable_segment( grow_segment ); - } - if( tmp_n ) // if op_insert only - delete_node( tmp_n ); - return return_value; - } - - struct accessor_not_used { void release(){}}; - friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} - friend const_accessor* accessor_location( const_accessor & a ) { return &a;} - - friend bool is_write_access_needed( accessor const& ) { return true;} - friend bool is_write_access_needed( const_accessor const& ) { return false;} - friend bool is_write_access_needed( accessor_not_used const& ) { return false;} - - template <typename Accessor> - bool generic_move_insert( Accessor && result, value_type && value ) { - result.release(); - return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct ); - } - - template <typename Accessor, typename... Args> - bool generic_emplace( Accessor && result, Args &&... args ) { - result.release(); - node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...); - return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr ); - } - - // delete item by accessor - bool exclude( const_accessor &item_accessor ) { - __TBB_ASSERT( item_accessor.my_node, nullptr ); - node_base *const exclude_node = item_accessor.my_node; - hashcode_type const hash = item_accessor.my_hash; - hashcode_type mask = this->my_mask.load(std::memory_order_acquire); - do { - // get bucket - bucket_accessor b( this, hash & mask, /*writer=*/true ); - node_base* prev = nullptr; - node_base* curr = b()->node_list.load(std::memory_order_relaxed); - - while (curr && curr != exclude_node) { - prev = curr; - curr = curr->next; - } - - if (curr == nullptr) { // someone else was first - if (this->check_mask_race(hash, mask)) - continue; - item_accessor.release(); - return false; - } - __TBB_ASSERT( curr == exclude_node, nullptr ); - // remove from container - if (prev == nullptr) { - b()->node_list.store(curr->next, std::memory_order_relaxed); - } else { - prev->next = curr->next; - } - - this->my_size--; - break; - } while(true); - if (!item_accessor.is_writer()) { // need to get exclusive lock - item_accessor.upgrade_to_writer(); // return value means nothing here - } - - item_accessor.release(); - delete_node(exclude_node); // Only one thread can delete it - return true; - } - - // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) - template <typename I> - std::pair<I, I> internal_equal_range( const Key& key, I end_ ) const { - hashcode_type h = my_hash_compare.hash( key ); - hashcode_type m = this->my_mask.load(std::memory_order_relaxed); - __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); - h &= m; - bucket *b = this->get_bucket( h ); - while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) { - m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit - b = this->get_bucket( h &= m ); - } - node *n = search_bucket( key, b ); - if( !n ) - return std::make_pair(end_, end_); - iterator lower(*this, h, b, n), upper(lower); - return std::make_pair(lower, ++upper); - } - - // Copy "source" to *this, where *this must start out empty. - void internal_copy( const concurrent_hash_map& source ) { - hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); - if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version - this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? - bucket *dst = 0, *src = 0; - bool rehash_required = false; - for( hashcode_type k = 0; k <= mask; k++ ) { - if( k & (k-2) ) ++dst,src++; // not the beginning of a segment - else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } - __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); - node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) ); - if( n == rehash_req ) { // source is not rehashed, items are in previous buckets - rehash_required = true; - dst->node_list.store(rehash_req, std::memory_order_relaxed); - } else for(; n; n = static_cast<node*>( n->next ) ) { - node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); - this->add_to_bucket( dst, node_ptr); - this->my_size.fetch_add(1, std::memory_order_relaxed); - } - } - if( rehash_required ) rehash(); - } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); - } - - template <typename I> - void internal_copy( I first, I last, size_type reserve_size ) { - this->reserve(reserve_size); // TODO: load_factor? - hashcode_type m = this->my_mask.load(std::memory_order_relaxed); - for(; first != last; ++first) { - hashcode_type h = my_hash_compare.hash( (*first).first ); - bucket *b = this->get_bucket( h & m ); - __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); - node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); - this->add_to_bucket( b, node_ptr ); - ++this->my_size; // TODO: replace by non-atomic op - } - } - - void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, - /*is_always_equal=*/std::true_type ) - { - this->internal_move(std::move(other)); - } - - void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, - /*is_always_equal=*/std::false_type ) - { - if (a == other.get_allocator()){ - this->internal_move(std::move(other)); - } else { - try_call( [&] { - internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), - other.size()); - }).on_exception( [&] { - this->clear(); - }); - } - } - - void internal_move_assign( concurrent_hash_map&& other, - /*is_always_equal || POCMA = */std::true_type) - { - this->internal_move(std::move(other)); - } - - void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { - if (this->my_allocator == other.my_allocator) { - this->internal_move(std::move(other)); - } else { - //do per element move - internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), - other.size()); - } - } - - void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { - this->internal_swap_content(other); - } - - void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { - __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); - this->internal_swap_content(other); - } - - // Fast find when no concurrent erasure is used. For internal use inside TBB only! - /** Return pointer to item with given key, or nullptr if no such item exists. - Must not be called concurrently with erasure operations. */ - const_pointer internal_fast_find( const Key& key ) const { - hashcode_type h = my_hash_compare.hash( key ); - hashcode_type m = this->my_mask.load(std::memory_order_acquire); - node *n; - restart: - __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); - bucket *b = this->get_bucket( h & m ); - // TODO: actually, notification is unnecessary here, just hiding double-check - if( b->node_list.load(std::memory_order_acquire) == rehash_req ) - { - typename bucket::scoped_type lock; - if( lock.try_acquire( b->mutex, /*write=*/true ) ) { - if( b->node_list.load(std::memory_order_relaxed) == rehash_req) - const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing - } - else lock.acquire( b->mutex, /*write=*/false ); - __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr); - } - n = search_bucket( key, b ); - if( n ) - return n->storage(); - else if( this->check_mask_race( h, m ) ) - goto restart; - return 0; - } -}; - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename It, - typename HashCompare = tbb_hash_compare<iterator_key_t<It>>, - typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<HashCompare>>> -concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() ) --> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_hash_map( It, It, Alloc ) --> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, tbb_hash_compare<iterator_key_t<It>>, Alloc>; - -template <typename Key, typename T, - typename HashCompare = tbb_hash_compare<std::remove_const_t<Key>>, - typename Alloc = tbb_allocator<std::pair<const Key, T>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<HashCompare>>> -concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() ) --> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc ) --> concurrent_hash_map<std::remove_const_t<Key>, T, tbb_hash_compare<std::remove_const_t<Key>>, Alloc>; - -#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ - -template <typename Key, typename T, typename HashCompare, typename A1, typename A2> -inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { - if(a.size() != b.size()) return false; - typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end()); - typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end()); - for(; i != i_end; ++i) { - j = b.equal_range(i->first).first; - if( j == j_end || !(i->second == j->second) ) return false; - } - return true; -} - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template <typename Key, typename T, typename HashCompare, typename A1, typename A2> -inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) -{ return !(a == b); } -#endif // !__TBB_CPP20_COMPARISONS_PRESENT - -template <typename Key, typename T, typename HashCompare, typename A> -inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b) -{ a.swap( b ); } - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::split; - using detail::d1::concurrent_hash_map; - using detail::d1::tbb_hash_compare; -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_concurrent_hash_map_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_hash_map_H +#define __TBB_concurrent_hash_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_assert.h" +#include "detail/_allocator_traits.h" +#include "detail/_containers_helpers.h" +#include "detail/_template_helpers.h" +#include "detail/_hash_compare.h" +#include "detail/_range_common.h" +#include "tbb_allocator.h" +#include "spin_rw_mutex.h" + +#include <atomic> +#include <initializer_list> +#include <tuple> +#include <iterator> +#include <utility> // Need std::pair +#include <cstring> // Need std::memset + +namespace tbb { +namespace detail { +namespace d1 { + +struct hash_map_node_base : no_copy { + using mutex_type = spin_rw_mutex; + // Scoped lock type for mutex + using scoped_type = mutex_type::scoped_lock; + // Next node in chain + hash_map_node_base* next; + mutex_type mutex; +}; + +// Incompleteness flag value +static hash_map_node_base* const rehash_req = reinterpret_cast<hash_map_node_base*>(std::size_t(3)); +// Rehashed empty bucket flag +static hash_map_node_base* const empty_rehashed = reinterpret_cast<hash_map_node_base*>(std::size_t(0)); + +// base class of concurrent_hash_map + +template <typename Allocator> +class hash_map_base { +public: + using size_type = std::size_t; + using hashcode_type = std::size_t; + using segment_index_type = std::size_t; + using node_base = hash_map_node_base; + + struct bucket : no_copy { + using mutex_type = spin_rw_mutex; + using scoped_type = mutex_type::scoped_lock; + + bucket() : node_list(nullptr) {} + bucket( node_base* ptr ) : node_list(ptr) {} + + mutex_type mutex; + std::atomic<node_base*> node_list; + }; + + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>; + using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>; + + // Count of segments in the first block + static constexpr size_type embedded_block = 1; + // Count of segments in the first block + static constexpr size_type embedded_buckets = 1 << embedded_block; + // Count of segments in the first block + static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 + // Size of a pointer / table size + static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit + + using segment_ptr_type = bucket*; + using atomic_segment_type = std::atomic<segment_ptr_type>; + using segments_table_type = atomic_segment_type[pointers_per_table]; + + hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { + for (size_type i = 0; i != embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { + auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; + my_table[segment_index].store(argument, std::memory_order_relaxed); + } + + __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); + } + + // segment index of given index in the array + static segment_index_type segment_index_of( size_type index ) { + return segment_index_type(tbb::detail::log2( index|1 )); + } + + // the first array index of given segment + static segment_index_type segment_base( segment_index_type k ) { + return (segment_index_type(1) << k & ~segment_index_type(1)); + } + + // segment size except for k == 0 + static size_type segment_size( segment_index_type k ) { + return size_type(1) << k; // fake value for k==0 + } + + // true if ptr is valid pointer + static bool is_valid( void* ptr ) { + return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63); + } + + template <typename... Args> + void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { + for (size_type i = 0; i < sz; ++i) { + bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...); + } + } + + // Initialize buckets + void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { + if (is_initial) { + init_buckets_impl(ptr, sz); + } else { + init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req)); + } + } + + // Add node n to bucket b + static void add_to_bucket( bucket* b, node_base* n ) { + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + n->next = b->node_list.load(std::memory_order_relaxed); + b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set + } + + const bucket_allocator_type& get_allocator() const { + return my_allocator; + } + + bucket_allocator_type& get_allocator() { + return my_allocator; + } + + // Enable segment + void enable_segment( segment_index_type k, bool is_initial = false ) { + __TBB_ASSERT( k, "Zero segment must be embedded" ); + size_type sz; + __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); + if (k >= first_block) { + sz = segment_size(k); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz, is_initial); + my_table[k].store(ptr, std::memory_order_release); + sz <<= 1;// double it to get entire capacity of the container + } else { // the first block + __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); + sz = segment_size(first_block); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz - embedded_buckets, is_initial); + ptr -= segment_base(embedded_block); + for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets + my_table[i].store(ptr + segment_base(i), std::memory_order_release); + } + my_mask.store(sz-1, std::memory_order_release); + } + + void delete_segment( segment_index_type s ) { + segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); + size_type sz = segment_size( s ? s : 1 ); + + size_type deallocate_size = 0; + + if (s >= first_block) { // the first segment or the next + deallocate_size = sz; + } else if (s == embedded_block && embedded_block != first_block) { + deallocate_size = segment_size(first_block) - embedded_buckets; + } + + for (size_type i = 0; i < deallocate_size; ++i) { + bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i); + } + if (deallocate_size != 0) { + bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size); + } + + if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); + } + + // Get bucket by (masked) hashcode + bucket *get_bucket( hashcode_type h ) const noexcept { + segment_index_type s = segment_index_of( h ); + h -= segment_base(s); + segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); + __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); + return &seg[h]; + } + + // detail serial rehashing helper + void mark_rehashed_levels( hashcode_type h ) noexcept { + segment_index_type s = segment_index_of( h ); + while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) + if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) { + seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed); + mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s) + } + } + + // Check for mask race + // Splitting into two functions should help inlining + inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const { + hashcode_type m_now, m_old = m; + m_now = my_mask.load(std::memory_order_acquire); + if (m_old != m_now) { + return check_rehashing_collision(h, m_old, m = m_now); + } + return false; + } + + // Process mask race, check for rehashing collision + bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const { + __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m + if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event + // condition above proves that 'h' has some other bits set beside 'm_old' + // find next applicable mask after m_old //TODO: look at bsl instruction + for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size + ; + m_old = (m_old<<1) - 1; // get full mask from a bit + __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr); + // check whether it is rehashing/ed + if( get_bucket(h & m_old)->node_list.load(std::memory_order_acquire) != rehash_req ) { + return true; + } + } + return false; + } + + // Insert a node and check for load factor. @return segment index to enable. + segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { + size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted + add_to_bucket( b, n ); + // check load factor + if( sz >= mask ) { // TODO: add custom load_factor + segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of + __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); + static const segment_ptr_type is_allocating = segment_ptr_type(2);; + segment_ptr_type disabled = nullptr; + if (!(my_table[new_seg].load(std::memory_order_acquire)) + && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) + return new_seg; // The value must be processed + } + return 0; + } + + // Prepare enough segments for number of buckets + void reserve(size_type buckets) { + if( !buckets-- ) return; + bool is_initial = !my_size.load(std::memory_order_relaxed); + for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; + m = my_mask.load(std::memory_order_relaxed)) + { + enable_segment( segment_index_of( m+1 ), is_initial ); + } + } + + // Swap hash_map_bases + void internal_swap_content(hash_map_base &table) { + using std::swap; + swap_atomics_relaxed(my_mask, table.my_mask); + swap_atomics_relaxed(my_size, table.my_size); + + for(size_type i = 0; i < embedded_buckets; i++) { + auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); + my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); + } + for(size_type i = embedded_block; i < pointers_per_table; i++) { + auto temp = my_table[i].load(std::memory_order_relaxed); + my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_table[i].store(temp, std::memory_order_relaxed); + } + } + + void internal_move(hash_map_base&& other) { + my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); + + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(0, std::memory_order_relaxed); + + for (size_type i = 0; i < embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); + other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type i = embedded_block; i < pointers_per_table; ++i) { + my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + other.my_table[i].store(nullptr, std::memory_order_relaxed); + } + } + +protected: + + bucket_allocator_type my_allocator; + // Hash mask = sum of allocated segment sizes - 1 + std::atomic<hashcode_type> my_mask; + // Size of container in stored items + std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects + // Zero segment + bucket my_embedded_segment[embedded_buckets]; + // Segment pointers table. Also prevents false sharing between my_mask and my_size + segments_table_type my_table; +}; + +template <typename Iterator> +class hash_map_range; + +// Meets requirements of a forward iterator for STL +// Value is either the T or const T type of the container. +template <typename Container, typename Value> +class hash_map_iterator { + using map_type = Container; + using node = typename Container::node; + using map_base = typename Container::base_type; + using node_base = typename map_base::node_base; + using bucket = typename map_base::bucket; +public: + using value_type = Value; + using size_type = typename Container::size_type; + using difference_type = typename Container::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::forward_iterator_tag; + + // Construct undefined iterator + hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} + hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) : + my_map(other.my_map), + my_index(other.my_index), + my_bucket(other.my_bucket), + my_node(other.my_node) + {} + + hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) { + my_map = other.my_map; + my_index = other.my_index; + my_bucket = other.my_bucket; + my_node = other.my_node; + return *this; + } + + Value& operator*() const { + __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); + return my_node->value(); + } + + Value* operator->() const {return &operator*();} + + hash_map_iterator& operator++() { + my_node = static_cast<node*>( my_node->next ); + if( !my_node ) advance_to_next_bucket(); + return *this; + } + + // Post increment + hash_map_iterator operator++(int) { + hash_map_iterator old(*this); + operator++(); + return old; + } +private: + template <typename C, typename T, typename U> + friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename T, typename U> + friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename T, typename U> + friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename U> + friend class hash_map_iterator; + + template <typename I> + friend class hash_map_range; + + void advance_to_next_bucket() { // TODO?: refactor to iterator_base class + size_t k = my_index+1; + __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); + while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { + // Following test uses 2's-complement wizardry + if( k&(k-2) ) // not the beginning of a segment + ++my_bucket; + else my_bucket = my_map->get_bucket( k ); + my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) ); + if( map_base::is_valid(my_node) ) { + my_index = k; return; + } + ++k; + } + my_bucket = 0; my_node = 0; my_index = k; // the end + } + + template <typename Key, typename T, typename HashCompare, typename A> + friend class concurrent_hash_map; + + hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : + my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n)) + { + if( b && !map_base::is_valid(n) ) + advance_to_next_bucket(); + } + + // concurrent_hash_map over which we are iterating. + const Container *my_map; + // Index in hash table for current item + size_t my_index; + // Pointer to bucket + const bucket* my_bucket; + // Pointer to node that has current item + node* my_node; +}; + +template <typename Container, typename T, typename U> +bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { + return i.my_node == j.my_node && i.my_map == j.my_map; +} + +template <typename Container, typename T, typename U> +bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { + return i.my_node != j.my_node || i.my_map != j.my_map; +} + +// Range class used with concurrent_hash_map +template <typename Iterator> +class hash_map_range { + using map_type = typename Iterator::map_type; +public: + // Type for size of a range + using size_type = std::size_t; + using value_type = typename Iterator::value_type; + using reference = typename Iterator::reference; + using difference_type = typename Iterator::difference_type; + using iterator = Iterator; + + // True if range is empty. + bool empty() const {return my_begin == my_end;} + + // True if range can be partitioned into two subranges. + bool is_divisible() const { + return my_midpoint != my_end; + } + + // Split range. + hash_map_range( hash_map_range& r, split ) : + my_end(r.my_end), + my_grainsize(r.my_grainsize) + { + r.my_end = my_begin = r.my_midpoint; + __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); + __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); + set_midpoint(); + r.set_midpoint(); + } + + // Init range with container and grainsize specified + hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : + my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), + my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), + my_grainsize( grainsize_ ) + { + __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); + set_midpoint(); + } + + const Iterator begin() const { return my_begin; } + const Iterator end() const { return my_end; } + // The grain size for this range. + size_type grainsize() const { return my_grainsize; } + +private: + Iterator my_begin; + Iterator my_end; + mutable Iterator my_midpoint; + size_t my_grainsize; + // Set my_midpoint to point approximately half way between my_begin and my_end. + void set_midpoint() const; + template <typename U> friend class hash_map_range; +}; + +template <typename Iterator> +void hash_map_range<Iterator>::set_midpoint() const { + // Split by groups of nodes + size_t m = my_end.my_index-my_begin.my_index; + if( m > my_grainsize ) { + m = my_begin.my_index + m/2u; + auto b = my_begin.my_map->get_bucket(m); + my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); + } else { + my_midpoint = my_end; + } + __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, + "my_begin is after my_midpoint" ); + __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, + "my_midpoint is after my_end" ); + __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, + "[my_begin, my_midpoint) range should not be empty" ); +} + +template <typename Key, typename T, + typename HashCompare = tbb_hash_compare<Key>, + typename Allocator = tbb_allocator<std::pair<const Key, T>>> +class concurrent_hash_map : protected hash_map_base<Allocator> { + template <typename Container, typename Value> + friend class hash_map_iterator; + + template <typename I> + friend class hash_map_range; + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; +public: + using base_type = hash_map_base<Allocator>; + using key_type = Key; + using mapped_type = T; + // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors + // and copy/move constructor with explicit allocator argument + using allocator_type = tbb::detail::type_identity_t<Allocator>; + using hash_compare_type = tbb::detail::type_identity_t<HashCompare>; + using value_type = std::pair<const Key, T>; + using size_type = typename base_type::size_type; + using difference_type = std::ptrdiff_t; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using reference = value_type&; + using const_reference = const value_type&; + using iterator = hash_map_iterator<concurrent_hash_map, value_type>; + using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>; + using range_type = hash_map_range<iterator>; + using const_range_type = hash_map_range<const_iterator>; + +protected: + static_assert(std::is_same<value_type, typename Allocator::value_type>::value, + "value_type of the container must be the same as its allocator's"); + + friend class const_accessor; + class node; + using segment_index_type = typename base_type::segment_index_type; + using segment_ptr_type = typename base_type::segment_ptr_type; + using node_base = typename base_type::node_base; + using bucket = typename base_type::bucket; + using hashcode_type = typename base_type::hashcode_type; + using bucket_allocator_type = typename base_type::bucket_allocator_type; + using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>; + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + hash_compare_type my_hash_compare; + + class node : public node_base { + public: + node() {} + ~node() {} + pointer storage() { return &my_value; } + value_type& value() { return *storage(); } + private: + union { + value_type my_value; + }; + }; + + void delete_node( node_base *n ) { + node_allocator_type node_allocator(this->get_allocator()); + node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage()); + node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)); + node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1); + } + + template <typename... Args> + static node* create_node(bucket_allocator_type& allocator, Args&&... args) { + node_allocator_type node_allocator(allocator); + node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); + auto guard = make_raii_guard([&] { + node_allocator_traits::destroy(node_allocator, node_ptr); + node_allocator_traits::deallocate(node_allocator, node_ptr, 1); + }); + + node_allocator_traits::construct(node_allocator, node_ptr); + node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...); + guard.dismiss(); + return node_ptr; + } + + static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, *t); + } + + static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, std::move(*const_cast<T*>(t))); + } + + static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){ + // Emplace construct an empty T object inside the pair + return create_node(allocator, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + } + + static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ + __TBB_ASSERT(false,"this dummy function should not be called"); + return nullptr; + } + + node *search_bucket( const key_type &key, bucket *b ) const { + node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) ); + while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) + n = static_cast<node*>( n->next ); + __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket"); + return n; + } + + // bucket accessor is to find, rehash, acquire a lock, and access a bucket + class bucket_accessor : public bucket::scoped_type { + bucket *my_b; + public: + bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } + // find a bucket by masked hashcode, optionally rehash, and acquire the lock + inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { + my_b = base->get_bucket( h ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( my_b->node_list.load(std::memory_order_acquire) == rehash_req + && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) + { + if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing + } + else bucket::scoped_type::acquire( my_b->mutex, writer ); + __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + } + // check whether bucket is locked for write + bool is_writer() { return bucket::scoped_type::m_is_writer; } + // get bucket pointer + bucket *operator() () { return my_b; } + }; + + // TODO refactor to hash_base + void rehash_bucket( bucket *b_new, const hashcode_type hash ) { + __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)"); + __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); + b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed + hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit + bucket_accessor b_old( this, hash & mask ); + + mask = (mask<<1) | 1; // get full mask for new bucket + __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); + restart: + node_base* prev = nullptr; + node_base* curr = b_old()->node_list.load(std::memory_order_acquire); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); + + if ((curr_node_hash & mask) == hash) { + if (!b_old.is_writer()) { + if (!b_old.upgrade_to_writer()) { + goto restart; // node ptr can be invalid due to concurrent erase + } + } + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + +public: + + class accessor; + // Combines data access, locking, and garbage collection. + class const_accessor : private node::scoped_type /*which derived from no_copy*/ { + friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; + friend class accessor; + public: + // Type of value + using value_type = const typename concurrent_hash_map::value_type; + + // True if result is empty. + bool empty() const { return !my_node; } + + // Set to null + void release() { + if( my_node ) { + node::scoped_type::release(); + my_node = 0; + } + } + + // Return reference to associated value in hash table. + const_reference operator*() const { + __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); + return my_node->value(); + } + + // Return pointer to associated value in hash table. + const_pointer operator->() const { + return &operator*(); + } + + // Create empty result + const_accessor() : my_node(nullptr) {} + + // Destroy result after releasing the underlying reference. + ~const_accessor() { + my_node = nullptr; // scoped lock's release() is called in its destructor + } + protected: + bool is_writer() { return node::scoped_type::m_is_writer; } + node *my_node; + hashcode_type my_hash; + }; + + // Allows write access to elements and combines data access, locking, and garbage collection. + class accessor: public const_accessor { + public: + // Type of value + using value_type = typename concurrent_hash_map::value_type; + + // Return reference to associated value in hash table. + reference operator*() const { + __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); + return this->my_node->value(); + } + + // Return pointer to associated value in hash table. + pointer operator->() const { + return &operator*(); + } + }; + + explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : base_type(a) + , my_hash_compare(compare) + {} + + concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {} + + explicit concurrent_hash_map( const allocator_type& a ) + : concurrent_hash_map(hash_compare_type(), a) + {} + + // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. + concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + this->reserve(n); + } + + concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + this->reserve(n); + } + + // Copy constructor + concurrent_hash_map( const concurrent_hash_map &table ) + : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table ) + : concurrent_hash_map(std::move(table.get_allocator())) + { + this->internal_move(std::move(table)); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) + : concurrent_hash_map(a) + { + using is_equal_type = typename node_allocator_traits::is_always_equal; + internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); + } + + // Construction with copying iteration range and given allocator instance + template <typename I> + concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + template <typename I> + concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(il.begin(), il.end(), il.size()); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a ) + : concurrent_hash_map(il, hash_compare_type(), a) {} + + // Assignment + concurrent_hash_map& operator=( const concurrent_hash_map &table ) { + if( this != &table ) { + clear(); + copy_assign_allocators(this->my_allocator, table.my_allocator); + internal_copy(table); + } + return *this; + } + + // Move Assignment + concurrent_hash_map& operator=( concurrent_hash_map &&table ) { + if( this != &table ) { + using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; + using is_equal_type = typename node_allocator_traits::is_always_equal; + move_assign_allocators(this->my_allocator, table.my_allocator); + internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>()); + } + return *this; + } + + // Assignment + concurrent_hash_map& operator=( std::initializer_list<value_type> il ) { + clear(); + internal_copy(il.begin(), il.end(), il.size()); + return *this; + } + + // Rehashes and optionally resizes the whole table. + /** Useful to optimize performance before or after concurrent operations. + Also enables using of find() and count() concurrent methods in serial context. */ + void rehash(size_type sz = 0) { + this->reserve(sz); // TODO: add reduction of number of buckets as well + hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); + hashcode_type b = (mask+1)>>1; // size or first index of the last segment + __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 + bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing + for(; b <= mask; b++, bp++ ) { + node_base *n = bp->node_list.load(std::memory_order_relaxed); + __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" ); + __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); + if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one + hashcode_type h = b; bucket *b_old = bp; + do { + __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); + hashcode_type m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit + b_old = this->get_bucket( h &= m ); + } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req ); + // now h - is index of the root rehashed bucket b_old + this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments + node_base* prev = nullptr; + node_base* curr = b_old->node_list.load(std::memory_order_relaxed); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); + + if ((curr_node_hash & mask) != h) { // should be rehashed + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + bucket *b_new = this->get_bucket(curr_node_hash & mask); + __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" ); + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + } + } + + // Clear table + void clear() { + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + this->my_size.store(0, std::memory_order_relaxed); + segment_index_type s = this->segment_index_of( m ); + __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); + do { + __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); + segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); + size_type sz = this->segment_size( s ? s : 1 ); + for( segment_index_type i = 0; i < sz; i++ ) + for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); + this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) + { + buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); + delete_node( n ); + } + this->delete_segment(s); + } while(s-- > 0); + this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); + } + + // Clear table and destroy it. + ~concurrent_hash_map() { clear(); } + + //------------------------------------------------------------------------ + // Parallel algorithm support + //------------------------------------------------------------------------ + range_type range( size_type grainsize=1 ) { + return range_type( *this, grainsize ); + } + const_range_type range( size_type grainsize=1 ) const { + return const_range_type( *this, grainsize ); + } + + //------------------------------------------------------------------------ + // STL support - not thread-safe methods + //------------------------------------------------------------------------ + iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + iterator end() { return iterator( *this, 0, 0, 0 ); } + const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } + const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } + std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); } + std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } + + // Number of items in table. + size_type size() const { return this->my_size.load(std::memory_order_acquire); } + + // True if size()==0. + __TBB_nodiscard bool empty() const { return size() == 0; } + + // Upper bound on size. + size_type max_size() const { + return allocator_traits_type::max_size(base_type::get_allocator()); + } + + // Returns the current number of buckets + size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } + + // return allocator object + allocator_type get_allocator() const { return base_type::get_allocator(); } + + // swap two instances. Iterators are invalidated + void swap(concurrent_hash_map& table) { + using pocs_type = typename node_allocator_traits::propagate_on_container_swap; + using is_equal_type = typename node_allocator_traits::is_always_equal; + swap_allocators(this->my_allocator, table.my_allocator); + internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>()); + } + + //------------------------------------------------------------------------ + // concurrent map operations + //------------------------------------------------------------------------ + + // Return count of items (0 or 1) + size_type count( const Key &key ) const { + return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a read lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( const_accessor &result, const Key &key ) const { + result.release(); + return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a write lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node ); + } + + // Insert item (if not already present) and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct ); + } + + // Insert item (if not already present) and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( const value_type &value ) { + return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( value_type && value ) { + return generic_move_insert(accessor_not_used(), std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + template <typename... Args> + bool emplace( const_accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward<Args>(args)...); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + template <typename... Args> + bool emplace( accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward<Args>(args)...); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + template <typename... Args> + bool emplace( Args&&... args ) { + return generic_emplace(accessor_not_used(), std::forward<Args>(args)...); + } + + // Insert range [first, last) + template <typename I> + void insert( I first, I last ) { + for ( ; first != last; ++first ) + insert( *first ); + } + + // Insert initializer list + void insert( std::initializer_list<value_type> il ) { + insert( il.begin(), il.end() ); + } + + // Erase item. + /** Return true if item was erased by particularly this call. */ + bool erase( const Key &key ) { + node_base *erase_node; + hashcode_type const hash = my_hash_compare.hash(key); + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + restart: + {//lock scope + // get bucket + bucket_accessor b( this, hash & mask ); + search: + node_base* prev = nullptr; + erase_node = b()->node_list.load(std::memory_order_relaxed); + while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) { + prev = erase_node; + erase_node = erase_node->next; + } + + if (erase_node == nullptr) { // not found, but mask could be changed + if (this->check_mask_race(hash, mask)) + goto restart; + return false; + } else if (!b.is_writer() && !b.upgrade_to_writer()) { + if (this->check_mask_race(hash, mask)) // contended upgrade, check mask + goto restart; + goto search; + } + + // remove from container + if (prev == nullptr) { + b()->node_list.store(erase_node->next, std::memory_order_relaxed); + } else { + prev->next = erase_node->next; + } + this->my_size--; + } + { + typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); + } + // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! + delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket + return true; + } + + // Erase item by const_accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( const_accessor& item_accessor ) { + return exclude( item_accessor ); + } + + // Erase item by accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( accessor& item_accessor ) { + return exclude( item_accessor ); + } + +protected: + // Insert or find item and optionally acquire a lock on the item. + bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&, + const Key&, const T*), node *tmp_n = 0) + { + __TBB_ASSERT( !result || !result->my_node, nullptr ); + bool return_value; + hashcode_type const h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + segment_index_type grow_segment = 0; + node *n; + restart: + {//lock scope + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + return_value = false; + // get bucket + bucket_accessor b( this, h & m ); + // find a node + n = search_bucket( key, b() ); + if( op_insert ) { + // [opt] insert a key + if( !n ) { + if( !tmp_n ) { + tmp_n = allocate_node(base_type::get_allocator(), key, t); + } + if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion + // Rerun search_list, in case another thread inserted the item during the upgrade. + n = search_bucket( key, b() ); + if( this->is_valid(n) ) { // unfortunately, it did + b.downgrade_to_reader(); + goto exists; + } + } + if( this->check_mask_race(h, m) ) + goto restart; // b.release() is done in ~b(). + // insert and set flag to grow the container + grow_segment = this->insert_new_node( b(), n = tmp_n, m ); + tmp_n = 0; + return_value = true; + } + } else { // find or count + if( !n ) { + if( this->check_mask_race( h, m ) ) + goto restart; // b.release() is done in ~b(). TODO: replace by continue + return false; + } + return_value = true; + } + exists: + if( !result ) goto check_growth; + // TODO: the following seems as generic/regular operation + // acquire the item + if( !result->try_acquire( n->mutex, write ) ) { + for( tbb::detail::atomic_backoff backoff(true);; ) { + if( result->try_acquire( n->mutex, write ) ) break; + if( !backoff.bounded_pause() ) { + // the wait takes really long, restart the operation + b.release(); + __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" ); + yield(); + m = this->my_mask.load(std::memory_order_acquire); + goto restart; + } + } + } + }//lock scope + result->my_node = n; + result->my_hash = h; + check_growth: + // [opt] grow the container + if( grow_segment ) { + this->enable_segment( grow_segment ); + } + if( tmp_n ) // if op_insert only + delete_node( tmp_n ); + return return_value; + } + + struct accessor_not_used { void release(){}}; + friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} + friend const_accessor* accessor_location( const_accessor & a ) { return &a;} + + friend bool is_write_access_needed( accessor const& ) { return true;} + friend bool is_write_access_needed( const_accessor const& ) { return false;} + friend bool is_write_access_needed( accessor_not_used const& ) { return false;} + + template <typename Accessor> + bool generic_move_insert( Accessor && result, value_type && value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct ); + } + + template <typename Accessor, typename... Args> + bool generic_emplace( Accessor && result, Args &&... args ) { + result.release(); + node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...); + return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr ); + } + + // delete item by accessor + bool exclude( const_accessor &item_accessor ) { + __TBB_ASSERT( item_accessor.my_node, nullptr ); + node_base *const exclude_node = item_accessor.my_node; + hashcode_type const hash = item_accessor.my_hash; + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + do { + // get bucket + bucket_accessor b( this, hash & mask, /*writer=*/true ); + node_base* prev = nullptr; + node_base* curr = b()->node_list.load(std::memory_order_relaxed); + + while (curr && curr != exclude_node) { + prev = curr; + curr = curr->next; + } + + if (curr == nullptr) { // someone else was first + if (this->check_mask_race(hash, mask)) + continue; + item_accessor.release(); + return false; + } + __TBB_ASSERT( curr == exclude_node, nullptr ); + // remove from container + if (prev == nullptr) { + b()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + + this->my_size--; + break; + } while(true); + if (!item_accessor.is_writer()) { // need to get exclusive lock + item_accessor.upgrade_to_writer(); // return value means nothing here + } + + item_accessor.release(); + delete_node(exclude_node); // Only one thread can delete it + return true; + } + + // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) + template <typename I> + std::pair<I, I> internal_equal_range( const Key& key, I end_ ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + h &= m; + bucket *b = this->get_bucket( h ); + while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) { + m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit + b = this->get_bucket( h &= m ); + } + node *n = search_bucket( key, b ); + if( !n ) + return std::make_pair(end_, end_); + iterator lower(*this, h, b, n), upper(lower); + return std::make_pair(lower, ++upper); + } + + // Copy "source" to *this, where *this must start out empty. + void internal_copy( const concurrent_hash_map& source ) { + hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); + if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version + this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? + bucket *dst = 0, *src = 0; + bool rehash_required = false; + for( hashcode_type k = 0; k <= mask; k++ ) { + if( k & (k-2) ) ++dst,src++; // not the beginning of a segment + else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } + __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) ); + if( n == rehash_req ) { // source is not rehashed, items are in previous buckets + rehash_required = true; + dst->node_list.store(rehash_req, std::memory_order_relaxed); + } else for(; n; n = static_cast<node*>( n->next ) ) { + node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); + this->add_to_bucket( dst, node_ptr); + this->my_size.fetch_add(1, std::memory_order_relaxed); + } + } + if( rehash_required ) rehash(); + } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); + } + + template <typename I> + void internal_copy( I first, I last, size_type reserve_size ) { + this->reserve(reserve_size); // TODO: load_factor? + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + for(; first != last; ++first) { + hashcode_type h = my_hash_compare.hash( (*first).first ); + bucket *b = this->get_bucket( h & m ); + __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); + this->add_to_bucket( b, node_ptr ); + ++this->my_size; // TODO: replace by non-atomic op + } + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, + /*is_always_equal=*/std::true_type ) + { + this->internal_move(std::move(other)); + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, + /*is_always_equal=*/std::false_type ) + { + if (a == other.get_allocator()){ + this->internal_move(std::move(other)); + } else { + try_call( [&] { + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + }).on_exception( [&] { + this->clear(); + }); + } + } + + void internal_move_assign( concurrent_hash_map&& other, + /*is_always_equal || POCMA = */std::true_type) + { + this->internal_move(std::move(other)); + } + + void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { + if (this->my_allocator == other.my_allocator) { + this->internal_move(std::move(other)); + } else { + //do per element move + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + } + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { + this->internal_swap_content(other); + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { + __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); + this->internal_swap_content(other); + } + + // Fast find when no concurrent erasure is used. For internal use inside TBB only! + /** Return pointer to item with given key, or nullptr if no such item exists. + Must not be called concurrently with erasure operations. */ + const_pointer internal_fast_find( const Key& key ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + node *n; + restart: + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + bucket *b = this->get_bucket( h & m ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( b->node_list.load(std::memory_order_acquire) == rehash_req ) + { + typename bucket::scoped_type lock; + if( lock.try_acquire( b->mutex, /*write=*/true ) ) { + if( b->node_list.load(std::memory_order_relaxed) == rehash_req) + const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing + } + else lock.acquire( b->mutex, /*write=*/false ); + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr); + } + n = search_bucket( key, b ); + if( n ) + return n->storage(); + else if( this->check_mask_race( h, m ) ) + goto restart; + return 0; + } +}; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename HashCompare = tbb_hash_compare<iterator_key_t<It>>, + typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<HashCompare>>> +concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() ) +-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_hash_map( It, It, Alloc ) +-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, tbb_hash_compare<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, + typename HashCompare = tbb_hash_compare<std::remove_const_t<Key>>, + typename Alloc = tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<HashCompare>>> +concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() ) +-> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_hash_map<std::remove_const_t<Key>, T, tbb_hash_compare<std::remove_const_t<Key>>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +template <typename Key, typename T, typename HashCompare, typename A1, typename A2> +inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { + if(a.size() != b.size()) return false; + typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end()); + typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end()); + for(; i != i_end; ++i) { + j = b.equal_range(i->first).first; + if( j == j_end || !(i->second == j->second) ) return false; + } + return true; +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Key, typename T, typename HashCompare, typename A1, typename A2> +inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) +{ return !(a == b); } +#endif // !__TBB_CPP20_COMPARISONS_PRESENT + +template <typename Key, typename T, typename HashCompare, typename A> +inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b) +{ a.swap( b ); } + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::split; + using detail::d1::concurrent_hash_map; + using detail::d1::tbb_hash_compare; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_concurrent_hash_map_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h index b83dd5f8c1..d6d0a6d6a4 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h @@ -1,364 +1,364 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_lru_cache_H -#define __TBB_concurrent_lru_cache_H - -#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE - #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h -#endif - -#include "detail/_assert.h" -#include "detail/_aggregator.h" - -#include <map> // for std::map -#include <list> // for std::list -#include <utility> // for std::make_pair -#include <algorithm> // for std::find -#include <atomic> // for std::atomic<bool> - -namespace tbb { - -namespace detail { -namespace d1 { - -//----------------------------------------------------------------------------- -// Concurrent LRU cache -//----------------------------------------------------------------------------- - -template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)> -class concurrent_lru_cache : no_assign { -// incapsulated helper classes -private: - struct handle_object; - struct storage_map_value_type; - - struct aggregator_operation; - struct retrieve_aggregator_operation; - struct signal_end_of_usage_aggregator_operation; - -// typedefs -public: - using key_type = KeyT; - using value_type = ValT; - using pointer = ValT*; - using reference = ValT&; - using const_pointer = const ValT*; - using const_reference = const ValT&; - - using value_function_type = KeyToValFunctorT; - using handle = handle_object; -private: - using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>; - - using storage_map_type = std::map<key_type, storage_map_value_type>; - using storage_map_iterator_type = typename storage_map_type::iterator; - using storage_map_pointer_type = typename storage_map_type::pointer; - using storage_map_reference_type = typename storage_map_type::reference; - - using history_list_type = std::list<storage_map_iterator_type>; - using history_list_iterator_type = typename history_list_type::iterator; - - using aggregator_operation_type = aggregator_operation; - using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>; - using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>; - - friend class aggregating_functor<lru_cache_type,aggregator_operation_type>; - -// fields -private: - value_function_type my_value_function; - aggregator_type my_aggregator; - - storage_map_type my_storage_map; // storage map for used objects - history_list_type my_history_list; // history list for unused objects - const std::size_t my_history_list_capacity; // history list's allowed capacity - -// interface -public: - - concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity) - : my_value_function(value_function), my_history_list_capacity(cache_capacity) { - my_aggregator.initialize_handler(aggregator_function_type(this)); - } - - handle operator[](key_type key) { - retrieve_aggregator_operation op(key); - my_aggregator.execute(&op); - - if (op.is_new_value_needed()) { - op.result().second.my_value = my_value_function(key); - op.result().second.my_is_ready.store(true, std::memory_order_release); - } else { - spin_wait_while_eq(op.result().second.my_is_ready, false); - } - - return handle(*this, op.result()); - } - -private: - - void handle_operations(aggregator_operation* op_list) { - while (op_list) { - op_list->cast_and_handle(*this); - aggregator_operation* prev_op = op_list; - op_list = op_list->next; - - (prev_op->status).store(1, std::memory_order_release); - } - } - - void signal_end_of_usage(storage_map_reference_type map_record_ref) { - signal_end_of_usage_aggregator_operation op(map_record_ref); - my_aggregator.execute(&op); - } - - void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) { - storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first); - - __TBB_ASSERT(map_it != my_storage_map.end(), - "cache should not return past-end iterators to outer world"); - __TBB_ASSERT(&(*map_it) == &map_record_ref, - "dangling reference has been returned to outside world: data race?"); - __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(), - "object in use should not be in list of unused objects "); - - // if it was the last reference, put it to the LRU history - if (! --(map_it->second.my_ref_counter)) { - // if the LRU history is full, evict the oldest items to get space - if (my_history_list.size() >= my_history_list_capacity) { - std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; - - for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { - storage_map_iterator_type map_it_to_evict = my_history_list.back(); - - __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0, - "item to be evicted should not have a live references"); - - // TODO: can we use forward_list instead of list? pop_front / insert_after last - my_history_list.pop_back(); - my_storage_map.erase(map_it_to_evict); - } - } - - // TODO: can we use forward_list instead of list? pop_front / insert_after last - my_history_list.push_front(map_it); - map_it->second.my_history_list_iterator = my_history_list.begin(); - } - } - - storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) { - storage_map_iterator_type map_it = my_storage_map.find(key); - - if (map_it == my_storage_map.end()) { - map_it = my_storage_map.emplace_hint( - map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false)); - is_new_value_needed = true; - } else { - history_list_iterator_type list_it = map_it->second.my_history_list_iterator; - if (list_it != my_history_list.end()) { - __TBB_ASSERT(map_it->second.my_ref_counter == 0, - "item to be evicted should not have a live references"); - - // Item is going to be used. Therefore it is not a subject for eviction, - // so we remove it from LRU history. - my_history_list.erase(list_it); - map_it->second.my_history_list_iterator = my_history_list.end(); - } - } - - ++(map_it->second.my_ref_counter); - return *map_it; - } -}; - -//----------------------------------------------------------------------------- -// Value type for storage map in concurrent LRU cache -//----------------------------------------------------------------------------- - -template<typename KeyT, typename ValT, typename KeyToValFunctorT> -struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type { -//typedefs -public: - using ref_counter_type = std::size_t; - -// fields -public: - value_type my_value; - ref_counter_type my_ref_counter; - history_list_iterator_type my_history_list_iterator; - std::atomic<bool> my_is_ready; - -// interface -public: - storage_map_value_type( - value_type const& value, ref_counter_type ref_counter, - history_list_iterator_type history_list_iterator, bool is_ready) - : my_value(value), my_ref_counter(ref_counter), - my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {} -}; - -//----------------------------------------------------------------------------- -// Handle object for operator[] in concurrent LRU cache -//----------------------------------------------------------------------------- - -template<typename KeyT, typename ValT, typename KeyToValFunctorT> -struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object { -// fields -private: - lru_cache_type* my_lru_cache_ptr; - storage_map_pointer_type my_map_record_ptr; - -// interface -public: - handle_object() - : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {} - handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref) - : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {} - - handle_object(handle_object&) = delete; - void operator=(handle_object&) = delete; - - handle_object(handle_object&& other) - : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) { - - __TBB_ASSERT( - bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), - "invalid state of moving object?"); - - other.my_lru_cache_ptr = nullptr; - other.my_map_record_ptr = nullptr; - } - - handle_object& operator=(handle_object&& other) { - __TBB_ASSERT( - bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), - "invalid state of moving object?"); - - if (my_lru_cache_ptr) - my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); - - my_lru_cache_ptr = other.my_lru_cache_ptr; - my_map_record_ptr = other.my_map_record_ptr; - other.my_lru_cache_ptr = nullptr; - other.my_map_record_ptr = nullptr; - - return *this; - } - - ~handle_object() { - if (my_lru_cache_ptr) - my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); - } - - operator bool() const { - return (my_lru_cache_ptr && my_map_record_ptr); - } - - value_type& value() { - __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?"); - __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?"); - - return my_map_record_ptr->second.my_value; - } -}; - -//----------------------------------------------------------------------------- -// Aggregator operation for aggregator type in concurrent LRU cache -//----------------------------------------------------------------------------- - -template<typename KeyT, typename ValT, typename KeyToValFunctorT> -struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation - : aggregated_operation<aggregator_operation> { -// incapsulated helper classes -public: - enum class op_type { retrieve, signal_end_of_usage }; - -// fields -private: - op_type my_op; - -// interface -public: - aggregator_operation(op_type op) : my_op(op) {} - - // TODO: aggregator_operation can be implemented - // - as a statically typed variant type or CRTP? (static, dependent on the use case) - // - or use pointer to function and apply_visitor (dynamic) - // - or use virtual functions (dynamic) - void cast_and_handle(lru_cache_type& lru_cache_ref) { - if (my_op == op_type::retrieve) - static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref); - else - static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref); - } -}; - -template<typename KeyT, typename ValT, typename KeyToValFunctorT> -struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation - : aggregator_operation, private no_assign { -public: - key_type my_key; - storage_map_pointer_type my_map_record_ptr; - bool my_is_new_value_needed; - -public: - retrieve_aggregator_operation(key_type key) - : aggregator_operation(aggregator_operation::op_type::retrieve), - my_key(key), my_is_new_value_needed(false) {} - - void handle(lru_cache_type& lru_cache_ref) { - my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); - } - - storage_map_reference_type result() { return *my_map_record_ptr; } - - bool is_new_value_needed() { return my_is_new_value_needed; } -}; - -template<typename KeyT, typename ValT, typename KeyToValFunctorT> -struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation - : aggregator_operation, private no_assign { - -private: - storage_map_reference_type my_map_record_ref; - -public: - signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref) - : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage), - my_map_record_ref(map_record_ref) {} - - void handle(lru_cache_type& lru_cache_ref) { - lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref); - } -}; - -// TODO: if we have guarantees that KeyToValFunctorT always have -// ValT as a return type and KeyT as an argument type -// we can deduce template parameters of concurrent_lru_cache -// by pattern matching on KeyToValFunctorT - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - -using detail::d1::concurrent_lru_cache; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_lru_cache_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_lru_cache_H +#define __TBB_concurrent_lru_cache_H + +#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE + #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h +#endif + +#include "detail/_assert.h" +#include "detail/_aggregator.h" + +#include <map> // for std::map +#include <list> // for std::list +#include <utility> // for std::make_pair +#include <algorithm> // for std::find +#include <atomic> // for std::atomic<bool> + +namespace tbb { + +namespace detail { +namespace d1 { + +//----------------------------------------------------------------------------- +// Concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)> +class concurrent_lru_cache : no_assign { +// incapsulated helper classes +private: + struct handle_object; + struct storage_map_value_type; + + struct aggregator_operation; + struct retrieve_aggregator_operation; + struct signal_end_of_usage_aggregator_operation; + +// typedefs +public: + using key_type = KeyT; + using value_type = ValT; + using pointer = ValT*; + using reference = ValT&; + using const_pointer = const ValT*; + using const_reference = const ValT&; + + using value_function_type = KeyToValFunctorT; + using handle = handle_object; +private: + using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>; + + using storage_map_type = std::map<key_type, storage_map_value_type>; + using storage_map_iterator_type = typename storage_map_type::iterator; + using storage_map_pointer_type = typename storage_map_type::pointer; + using storage_map_reference_type = typename storage_map_type::reference; + + using history_list_type = std::list<storage_map_iterator_type>; + using history_list_iterator_type = typename history_list_type::iterator; + + using aggregator_operation_type = aggregator_operation; + using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>; + using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>; + + friend class aggregating_functor<lru_cache_type,aggregator_operation_type>; + +// fields +private: + value_function_type my_value_function; + aggregator_type my_aggregator; + + storage_map_type my_storage_map; // storage map for used objects + history_list_type my_history_list; // history list for unused objects + const std::size_t my_history_list_capacity; // history list's allowed capacity + +// interface +public: + + concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity) + : my_value_function(value_function), my_history_list_capacity(cache_capacity) { + my_aggregator.initialize_handler(aggregator_function_type(this)); + } + + handle operator[](key_type key) { + retrieve_aggregator_operation op(key); + my_aggregator.execute(&op); + + if (op.is_new_value_needed()) { + op.result().second.my_value = my_value_function(key); + op.result().second.my_is_ready.store(true, std::memory_order_release); + } else { + spin_wait_while_eq(op.result().second.my_is_ready, false); + } + + return handle(*this, op.result()); + } + +private: + + void handle_operations(aggregator_operation* op_list) { + while (op_list) { + op_list->cast_and_handle(*this); + aggregator_operation* prev_op = op_list; + op_list = op_list->next; + + (prev_op->status).store(1, std::memory_order_release); + } + } + + void signal_end_of_usage(storage_map_reference_type map_record_ref) { + signal_end_of_usage_aggregator_operation op(map_record_ref); + my_aggregator.execute(&op); + } + + void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) { + storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first); + + __TBB_ASSERT(map_it != my_storage_map.end(), + "cache should not return past-end iterators to outer world"); + __TBB_ASSERT(&(*map_it) == &map_record_ref, + "dangling reference has been returned to outside world: data race?"); + __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(), + "object in use should not be in list of unused objects "); + + // if it was the last reference, put it to the LRU history + if (! --(map_it->second.my_ref_counter)) { + // if the LRU history is full, evict the oldest items to get space + if (my_history_list.size() >= my_history_list_capacity) { + std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; + + for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { + storage_map_iterator_type map_it_to_evict = my_history_list.back(); + + __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0, + "item to be evicted should not have a live references"); + + // TODO: can we use forward_list instead of list? pop_front / insert_after last + my_history_list.pop_back(); + my_storage_map.erase(map_it_to_evict); + } + } + + // TODO: can we use forward_list instead of list? pop_front / insert_after last + my_history_list.push_front(map_it); + map_it->second.my_history_list_iterator = my_history_list.begin(); + } + } + + storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) { + storage_map_iterator_type map_it = my_storage_map.find(key); + + if (map_it == my_storage_map.end()) { + map_it = my_storage_map.emplace_hint( + map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false)); + is_new_value_needed = true; + } else { + history_list_iterator_type list_it = map_it->second.my_history_list_iterator; + if (list_it != my_history_list.end()) { + __TBB_ASSERT(map_it->second.my_ref_counter == 0, + "item to be evicted should not have a live references"); + + // Item is going to be used. Therefore it is not a subject for eviction, + // so we remove it from LRU history. + my_history_list.erase(list_it); + map_it->second.my_history_list_iterator = my_history_list.end(); + } + } + + ++(map_it->second.my_ref_counter); + return *map_it; + } +}; + +//----------------------------------------------------------------------------- +// Value type for storage map in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type { +//typedefs +public: + using ref_counter_type = std::size_t; + +// fields +public: + value_type my_value; + ref_counter_type my_ref_counter; + history_list_iterator_type my_history_list_iterator; + std::atomic<bool> my_is_ready; + +// interface +public: + storage_map_value_type( + value_type const& value, ref_counter_type ref_counter, + history_list_iterator_type history_list_iterator, bool is_ready) + : my_value(value), my_ref_counter(ref_counter), + my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {} +}; + +//----------------------------------------------------------------------------- +// Handle object for operator[] in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object { +// fields +private: + lru_cache_type* my_lru_cache_ptr; + storage_map_pointer_type my_map_record_ptr; + +// interface +public: + handle_object() + : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {} + handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref) + : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {} + + handle_object(handle_object&) = delete; + void operator=(handle_object&) = delete; + + handle_object(handle_object&& other) + : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) { + + __TBB_ASSERT( + bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), + "invalid state of moving object?"); + + other.my_lru_cache_ptr = nullptr; + other.my_map_record_ptr = nullptr; + } + + handle_object& operator=(handle_object&& other) { + __TBB_ASSERT( + bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), + "invalid state of moving object?"); + + if (my_lru_cache_ptr) + my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); + + my_lru_cache_ptr = other.my_lru_cache_ptr; + my_map_record_ptr = other.my_map_record_ptr; + other.my_lru_cache_ptr = nullptr; + other.my_map_record_ptr = nullptr; + + return *this; + } + + ~handle_object() { + if (my_lru_cache_ptr) + my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); + } + + operator bool() const { + return (my_lru_cache_ptr && my_map_record_ptr); + } + + value_type& value() { + __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?"); + __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?"); + + return my_map_record_ptr->second.my_value; + } +}; + +//----------------------------------------------------------------------------- +// Aggregator operation for aggregator type in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation + : aggregated_operation<aggregator_operation> { +// incapsulated helper classes +public: + enum class op_type { retrieve, signal_end_of_usage }; + +// fields +private: + op_type my_op; + +// interface +public: + aggregator_operation(op_type op) : my_op(op) {} + + // TODO: aggregator_operation can be implemented + // - as a statically typed variant type or CRTP? (static, dependent on the use case) + // - or use pointer to function and apply_visitor (dynamic) + // - or use virtual functions (dynamic) + void cast_and_handle(lru_cache_type& lru_cache_ref) { + if (my_op == op_type::retrieve) + static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref); + else + static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref); + } +}; + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation + : aggregator_operation, private no_assign { +public: + key_type my_key; + storage_map_pointer_type my_map_record_ptr; + bool my_is_new_value_needed; + +public: + retrieve_aggregator_operation(key_type key) + : aggregator_operation(aggregator_operation::op_type::retrieve), + my_key(key), my_is_new_value_needed(false) {} + + void handle(lru_cache_type& lru_cache_ref) { + my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); + } + + storage_map_reference_type result() { return *my_map_record_ptr; } + + bool is_new_value_needed() { return my_is_new_value_needed; } +}; + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation + : aggregator_operation, private no_assign { + +private: + storage_map_reference_type my_map_record_ref; + +public: + signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref) + : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage), + my_map_record_ref(map_record_ref) {} + + void handle(lru_cache_type& lru_cache_ref) { + lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref); + } +}; + +// TODO: if we have guarantees that KeyToValFunctorT always have +// ValT as a return type and KeyT as an argument type +// we can deduce template parameters of concurrent_lru_cache +// by pattern matching on KeyToValFunctorT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_lru_cache; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_lru_cache_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h index ae389d4f42..3a3ec3e309 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h @@ -1,342 +1,342 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_map_H -#define __TBB_concurrent_map_H - -#include "detail/_namespace_injection.h" -#include "detail/_concurrent_skip_list.h" -#include "tbb_allocator.h" -#include <functional> -#include <tuple> -#include <utility> - -namespace tbb { -namespace detail { -namespace d1 { - -template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator, - typename Allocator, bool AllowMultimapping> -struct map_traits { - static constexpr std::size_t max_level = RandomGenerator::max_level; - using random_level_generator_type = RandomGenerator; - using key_type = Key; - using mapped_type = Value; - using compare_type = KeyCompare; - using value_type = std::pair<const key_type, mapped_type>; - using reference = value_type&; - using const_reference = const value_type&; - using allocator_type = Allocator; - - static constexpr bool allow_multimapping = AllowMultimapping; - - class value_compare { - public: - bool operator()(const value_type& lhs, const value_type& rhs) const { - return comp(lhs.first, rhs.first); - } - - protected: - value_compare(compare_type c) : comp(c) {} - - friend struct map_traits; - - compare_type comp; - }; - - static value_compare value_comp(compare_type comp) { return value_compare(comp); } - - static const key_type& get_key(const_reference val) { - return val.first; - } -}; // struct map_traits - -template <typename Key, typename Value, typename Compare, typename Allocator> -class concurrent_multimap; - -template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> -class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { - using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; -public: - using key_type = Key; - using mapped_type = Value; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using key_compare = Compare; - using value_compare = typename base_type::value_compare; - using allocator_type = Allocator; - - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - - using node_type = typename base_type::node_type; - - // Include constructors of base type - using base_type::base_type; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_map() = default; - concurrent_map( const concurrent_map& ) = default; - concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_map( concurrent_map&& ) = default; - concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_map& operator=( const concurrent_map& ) = default; - concurrent_map& operator=( concurrent_map&& ) = default; - - // Observers - mapped_type& at(const key_type& key) { - iterator it = this->find(key); - - if (it == this->end()) { - throw_exception(exception_id::invalid_key); - } - return it->second; - } - - const mapped_type& at(const key_type& key) const { - return const_cast<concurrent_map*>(this)->at(key); - } - - mapped_type& operator[](const key_type& key) { - iterator it = this->find(key); - - if (it == this->end()) { - it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; - } - return it->second; - } - - mapped_type& operator[](key_type&& key) { - iterator it = this->find(key); - - if (it == this->end()) { - it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; - } - return it->second; - } - - using base_type::insert; - - template <typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - std::pair<iterator, bool>>::type insert( P&& value ) - { - return this->emplace(std::forward<P>(value)); - } - - template <typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - iterator>::type insert( const_iterator hint, P&& value ) - { - return this->emplace_hint(hint, std::forward<P>(value)); - } - - template<typename OtherCompare> - void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } - - template<typename OtherCompare> - void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_map - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Comp = std::less<iterator_key_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() ) --> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; - -template <typename Key, typename T, - typename Comp = std::less<std::remove_const_t<Key>>, - typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) --> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_map( It, It, Alloc ) --> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, - std::less<iterator_key_t<It>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc ) --> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Value, typename Compare, typename Allocator> -void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs, - concurrent_map<Key, Value, Compare, Allocator>& rhs ) -{ - lhs.swap(rhs); -} - -template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> -class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { - using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; -public: - using key_type = Key; - using mapped_type = Value; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using key_compare = Compare; - using value_compare = typename base_type::value_compare; - using allocator_type = Allocator; - - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - - using node_type = typename base_type::node_type; - - // Include constructors of base_type - using base_type::base_type; - using base_type::insert; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_multimap() = default; - concurrent_multimap( const concurrent_multimap& ) = default; - concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_multimap( concurrent_multimap&& ) = default; - concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_multimap& operator=( const concurrent_multimap& ) = default; - concurrent_multimap& operator=( concurrent_multimap&& ) = default; - - template <typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - std::pair<iterator, bool>>::type insert( P&& value ) - { - return this->emplace(std::forward<P>(value)); - } - - template <typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - iterator>::type insert( const_iterator hint, P&& value ) - { - return this->emplace_hint(hint, std::forward<P>(value)); - } - - template<typename OtherCompare> - void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } - - template<typename OtherCompare> - void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_multimap - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Comp = std::less<iterator_key_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() ) --> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; - -template <typename Key, typename T, - typename Comp = std::less<std::remove_const_t<Key>>, - typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) --> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_multimap( It, It, Alloc ) --> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, - std::less<iterator_key_t<It>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) --> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; - - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Value, typename Compare, typename Allocator> -void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs, - concurrent_multimap<Key, Value, Compare, Allocator>& rhs ) -{ - lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - -using detail::d1::concurrent_map; -using detail::d1::concurrent_multimap; -using detail::split; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_map_H +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_map_H +#define __TBB_concurrent_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_skip_list.h" +#include "tbb_allocator.h" +#include <functional> +#include <tuple> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator, + typename Allocator, bool AllowMultimapping> +struct map_traits { + static constexpr std::size_t max_level = RandomGenerator::max_level; + using random_level_generator_type = RandomGenerator; + using key_type = Key; + using mapped_type = Value; + using compare_type = KeyCompare; + using value_type = std::pair<const key_type, mapped_type>; + using reference = value_type&; + using const_reference = const value_type&; + using allocator_type = Allocator; + + static constexpr bool allow_multimapping = AllowMultimapping; + + class value_compare { + public: + bool operator()(const value_type& lhs, const value_type& rhs) const { + return comp(lhs.first, rhs.first); + } + + protected: + value_compare(compare_type c) : comp(c) {} + + friend struct map_traits; + + compare_type comp; + }; + + static value_compare value_comp(compare_type comp) { return value_compare(comp); } + + static const key_type& get_key(const_reference val) { + return val.first; + } +}; // struct map_traits + +template <typename Key, typename Value, typename Compare, typename Allocator> +class concurrent_multimap; + +template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> +class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { + using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_map() = default; + concurrent_map( const concurrent_map& ) = default; + concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_map( concurrent_map&& ) = default; + concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_map& operator=( const concurrent_map& ) = default; + concurrent_map& operator=( concurrent_map&& ) = default; + + // Observers + mapped_type& at(const key_type& key) { + iterator it = this->find(key); + + if (it == this->end()) { + throw_exception(exception_id::invalid_key); + } + return it->second; + } + + const mapped_type& at(const key_type& key) const { + return const_cast<concurrent_map*>(this)->at(key); + } + + mapped_type& operator[](const key_type& key) { + iterator it = this->find(key); + + if (it == this->end()) { + it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; + } + return it->second; + } + + mapped_type& operator[](key_type&& key) { + iterator it = this->find(key); + + if (it == this->end()) { + it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; + } + return it->second; + } + + using base_type::insert; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) + { + return this->emplace(std::forward<P>(value)); + } + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) + { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_map + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; + +template <typename Key, typename T, + typename Comp = std::less<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_map( It, It, Alloc ) +-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, + std::less<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Value, typename Compare, typename Allocator> +void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs, + concurrent_map<Key, Value, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> +class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { + using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type + using base_type::base_type; + using base_type::insert; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_multimap() = default; + concurrent_multimap( const concurrent_multimap& ) = default; + concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_multimap( concurrent_multimap&& ) = default; + concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_multimap& operator=( const concurrent_multimap& ) = default; + concurrent_multimap& operator=( concurrent_multimap&& ) = default; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) + { + return this->emplace(std::forward<P>(value)); + } + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) + { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_multimap + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; + +template <typename Key, typename T, + typename Comp = std::less<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multimap( It, It, Alloc ) +-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, + std::less<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; + + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Value, typename Compare, typename Allocator> +void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs, + concurrent_multimap<Key, Value, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_map; +using detail::d1::concurrent_multimap; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_map_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h index a281740ad8..0147510af6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h @@ -1,490 +1,490 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_priority_queue_H -#define __TBB_concurrent_priority_queue_H - -#include "detail/_namespace_injection.h" -#include "detail/_aggregator.h" -#include "detail/_template_helpers.h" -#include "detail/_allocator_traits.h" -#include "detail/_range_common.h" -#include "detail/_exception.h" -#include "detail/_utils.h" -#include "detail/_containers_helpers.h" -#include "cache_aligned_allocator.h" -#include <vector> -#include <iterator> -#include <functional> -#include <utility> -#include <initializer_list> -#include <type_traits> - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>> -class concurrent_priority_queue { -public: - using value_type = T; - using reference = T&; - using const_reference = const T&; - - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - - concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {} - - explicit concurrent_priority_queue( const allocator_type& alloc ) - : mark(0), my_size(0), my_compare(), data(alloc) - { - my_aggregator.initialize_handler(functor{this}); - } - - explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() ) - : mark(0), my_size(0), my_compare(compare), data(alloc) - { - my_aggregator.initialize_handler(functor{this}); - } - - explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() ) - : mark(0), my_size(0), my_compare(), data(alloc) - { - data.reserve(init_capacity); - my_aggregator.initialize_handler(functor{this}); - } - - explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() ) - : mark(0), my_size(0), my_compare(compare), data(alloc) - { - data.reserve(init_capacity); - my_aggregator.initialize_handler(functor{this}); - } - - template <typename InputIterator> - concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() ) - : mark(0), my_compare(compare), data(begin, end, alloc) - { - my_aggregator.initialize_handler(functor{this}); - heapify(); - my_size.store(data.size(), std::memory_order_relaxed); - } - - template <typename InputIterator> - concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() ) - : concurrent_priority_queue(begin, end, Compare(), alloc) {} - - concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() ) - : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {} - - concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ) - : concurrent_priority_queue(init, Compare(), alloc) {} - - concurrent_priority_queue( const concurrent_priority_queue& other ) - : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), - data(other.data) - { - my_aggregator.initialize_handler(functor{this}); - } - - concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc ) - : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), - data(other.data, alloc) - { - my_aggregator.initialize_handler(functor{this}); - } - - concurrent_priority_queue( concurrent_priority_queue&& other ) - : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), - data(std::move(other.data)) - { - my_aggregator.initialize_handler(functor{this}); - } - - concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc ) - : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), - data(std::move(other.data), alloc) - { - my_aggregator.initialize_handler(functor{this}); - } - - concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) { - if (this != &other) { - data = other.data; - mark = other.mark; - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - } - return *this; - } - - concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) { - if (this != &other) { - // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately - data = std::move(other.data); - mark = other.mark; - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - } - return *this; - } - - concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) { - assign(init.begin(), init.end()); - return *this; - } - - template <typename InputIterator> - void assign( InputIterator begin, InputIterator end ) { - data.assign(begin, end); - mark = 0; - my_size.store(data.size(), std::memory_order_relaxed); - heapify(); - } - - void assign( std::initializer_list<value_type> init ) { - assign(init.begin(), init.end()); - } - - /* Returned value may not reflect results of pending operations. - This operation reads shared data and will trigger a race condition. */ - __TBB_nodiscard bool empty() const { return size() == 0; } - - // Returns the current number of elements contained in the queue - /* Returned value may not reflect results of pending operations. - This operation reads shared data and will trigger a race condition. */ - size_type size() const { return my_size.load(std::memory_order_relaxed); } - - /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ - void push( const value_type& value ) { - cpq_operation op_data(value, PUSH_OP); - my_aggregator.execute(&op_data); - if (op_data.status == FAILED) - throw_exception(exception_id::bad_alloc); - } - - /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ - void push( value_type&& value ) { - cpq_operation op_data(value, PUSH_RVALUE_OP); - my_aggregator.execute(&op_data); - if (op_data.status == FAILED) - throw_exception(exception_id::bad_alloc); - } - - /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ - template <typename... Args> - void emplace( Args&&... args ) { - // TODO: support uses allocator construction in this place - push(value_type(std::forward<Args>(args)...)); - } - - // Gets a reference to and removes highest priority element - /* If a highest priority element was found, sets elem and returns true, - otherwise returns false. - This operation can be safely used concurrently with other push, try_pop or emplace operations. */ - bool try_pop( value_type& value ) { - cpq_operation op_data(value, POP_OP); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - // This operation affects the whole container => it is not thread-safe - void clear() { - data.clear(); - mark = 0; - my_size.store(0, std::memory_order_relaxed); - } - - // This operation affects the whole container => it is not thread-safe - void swap( concurrent_priority_queue& other ) { - if (this != &other) { - using std::swap; - swap(data, other.data); - swap(mark, other.mark); - - size_type sz = my_size.load(std::memory_order_relaxed); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_size.store(sz, std::memory_order_relaxed); - } - } - - allocator_type get_allocator() const { return data.get_allocator(); } -private: - enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP}; - enum operation_status {WAIT = 0, SUCCEEDED, FAILED}; - - class cpq_operation : public aggregated_operation<cpq_operation> { - public: - operation_type type; - union { - value_type* elem; - size_type sz; - }; - cpq_operation( const value_type& value, operation_type t ) - : type(t), elem(const_cast<value_type*>(&value)) {} - }; // class cpq_operation - - class functor { - concurrent_priority_queue* my_cpq; - public: - functor() : my_cpq(nullptr) {} - functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {} - - void operator()(cpq_operation* op_list) { - __TBB_ASSERT(my_cpq != nullptr, "Invalid functor"); - my_cpq->handle_operations(op_list); - } - }; // class functor - - void handle_operations( cpq_operation* op_list ) { - call_itt_notify(acquired, this); - cpq_operation* tmp, *pop_list = nullptr; - __TBB_ASSERT(mark == data.size(), NULL); - - // First pass processes all constant (amortized; reallocation may happen) time pushes and pops. - while(op_list) { - // ITT note: &(op_list->status) tag is used to cover accesses to op_list - // node. This thread is going to handle the operation, and so will acquire it - // and perform the associated operation w/o triggering a race condition; the - // thread that created the operation is waiting on the status field, so when - // this thread is done with the operation, it will perform a - // store_with_release to give control back to the waiting thread in - // aggregator::insert_operation. - // TODO: enable - call_itt_notify(acquired, &(op_list->status)); - __TBB_ASSERT(op_list->type != INVALID_OP, NULL); - - tmp = op_list; - op_list = op_list->next.load(std::memory_order_relaxed); - if (tmp->type == POP_OP) { - if (mark < data.size() && - my_compare(data[0], data.back())) - { - // there are newly pushed elems and the last one is higher than top - *(tmp->elem) = std::move(data.back()); - my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); - tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); - - data.pop_back(); - __TBB_ASSERT(mark <= data.size(), NULL); - } else { // no convenient item to pop; postpone - tmp->next.store(pop_list, std::memory_order_relaxed); - pop_list = tmp; - } - } else { // PUSH_OP or PUSH_RVALUE_OP - __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation"); -#if TBB_USE_EXCEPTIONS - try -#endif - { - if (tmp->type == PUSH_OP) { - push_back_helper(*(tmp->elem)); - } else { - data.push_back(std::move(*(tmp->elem))); - } - my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); - } -#if TBB_USE_EXCEPTIONS - catch(...) { - tmp->status.store(uintptr_t(FAILED), std::memory_order_release); - } -#endif - } - } - - // Second pass processes pop operations - while(pop_list) { - tmp = pop_list; - pop_list = pop_list->next.load(std::memory_order_relaxed); - __TBB_ASSERT(tmp->type == POP_OP, NULL); - if (data.empty()) { - tmp->status.store(uintptr_t(FAILED), std::memory_order_release); - } else { - __TBB_ASSERT(mark <= data.size(), NULL); - if (mark < data.size() && - my_compare(data[0], data.back())) - { - // there are newly pushed elems and the last one is higher than top - *(tmp->elem) = std::move(data.back()); - my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); - tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); - data.pop_back(); - } else { // extract top and push last element down heap - *(tmp->elem) = std::move(data[0]); - my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); - tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); - reheap(); - } - } - } - - // heapify any leftover pushed elements before doing the next - // batch of operations - if (mark < data.size()) heapify(); - __TBB_ASSERT(mark == data.size(), NULL); - call_itt_notify(releasing, this); - } - - // Merge unsorted elements into heap - void heapify() { - if (!mark && data.size() > 0) mark = 1; - for (; mark < data.size(); ++mark) { - // for each unheapified element under size - size_type cur_pos = mark; - value_type to_place = std::move(data[mark]); - do { // push to_place up the heap - size_type parent = (cur_pos - 1) >> 1; - if (!my_compare(data[parent], to_place)) - break; - data[cur_pos] = std::move(data[parent]); - cur_pos = parent; - } while(cur_pos); - data[cur_pos] = std::move(to_place); - } - } - - // Re-heapify after an extraction - // Re-heapify by pushing last element down the heap from the root. - void reheap() { - size_type cur_pos = 0, child = 1; - - while(child < mark) { - size_type target = child; - if (child + 1 < mark && my_compare(data[child], data[child + 1])) - ++target; - // target now has the higher priority child - if (my_compare(data[target], data.back())) - break; - data[cur_pos] = std::move(data[target]); - cur_pos = target; - child = (cur_pos << 1) + 1; - } - if (cur_pos != data.size() - 1) - data[cur_pos] = std::move(data.back()); - data.pop_back(); - if (mark > data.size()) mark = data.size(); - } - - void push_back_helper( const T& value ) { - push_back_helper_impl(value, std::is_copy_constructible<T>{}); - } - - void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) { - data.push_back(value); - } - - void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) { - __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type"); - } - - using aggregator_type = aggregator<functor, cpq_operation>; - - aggregator_type my_aggregator; - // Padding added to avoid false sharing - char padding1[max_nfs_size - sizeof(aggregator_type)]; - // The point at which unsorted elements begin - size_type mark; - std::atomic<size_type> my_size; - Compare my_compare; - - // Padding added to avoid false sharing - char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)]; - //! Storage for the heap of elements in queue, plus unheapified elements - /** data has the following structure: - - binary unheapified - heap elements - ____|_______|____ - | | | - v v v - [_|...|_|_|...|_| |...| ] - 0 ^ ^ ^ - | | |__capacity - | |__my_size - |__mark - - Thus, data stores the binary heap starting at position 0 through - mark-1 (it may be empty). Then there are 0 or more elements - that have not yet been inserted into the heap, in positions - mark through my_size-1. */ - - using vector_type = std::vector<value_type, allocator_type>; - vector_type data; - - friend bool operator==( const concurrent_priority_queue& lhs, - const concurrent_priority_queue& rhs ) - { - return lhs.data == rhs.data; - } - -#if !__TBB_CPP20_COMPARISONS_PRESENT - friend bool operator!=( const concurrent_priority_queue& lhs, - const concurrent_priority_queue& rhs ) - { - return !(lhs == rhs); - } -#endif -}; // class concurrent_priority_queue - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename It, - typename Comp = std::less<iterator_value_t<It>>, - typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() ) --> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_priority_queue( It, It, Alloc ) --> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; - -template <typename T, - typename Comp = std::less<T>, - typename Alloc = tbb::cache_aligned_allocator<T>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() ) --> concurrent_priority_queue<T, Comp, Alloc>; - -template <typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_priority_queue( std::initializer_list<T>, Alloc ) --> concurrent_priority_queue<T, std::less<T>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename T, typename Compare, typename Allocator> -void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs, - concurrent_priority_queue<T, Compare, Allocator>& rhs ) -{ - lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail -inline namespace v1 { -using detail::d1::concurrent_priority_queue; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_priority_queue_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_priority_queue_H +#define __TBB_concurrent_priority_queue_H + +#include "detail/_namespace_injection.h" +#include "detail/_aggregator.h" +#include "detail/_template_helpers.h" +#include "detail/_allocator_traits.h" +#include "detail/_range_common.h" +#include "detail/_exception.h" +#include "detail/_utils.h" +#include "detail/_containers_helpers.h" +#include "cache_aligned_allocator.h" +#include <vector> +#include <iterator> +#include <functional> +#include <utility> +#include <initializer_list> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>> +class concurrent_priority_queue { +public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + + concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {} + + explicit concurrent_priority_queue( const allocator_type& alloc ) + : mark(0), my_size(0), my_compare(), data(alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(compare), data(alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(), data(alloc) + { + data.reserve(init_capacity); + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(compare), data(alloc) + { + data.reserve(init_capacity); + my_aggregator.initialize_handler(functor{this}); + } + + template <typename InputIterator> + concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_compare(compare), data(begin, end, alloc) + { + my_aggregator.initialize_handler(functor{this}); + heapify(); + my_size.store(data.size(), std::memory_order_relaxed); + } + + template <typename InputIterator> + concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(begin, end, Compare(), alloc) {} + + concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {} + + concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(init, Compare(), alloc) {} + + concurrent_priority_queue( const concurrent_priority_queue& other ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(other.data) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(other.data, alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( concurrent_priority_queue&& other ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(std::move(other.data)) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(std::move(other.data), alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) { + if (this != &other) { + data = other.data; + mark = other.mark; + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + return *this; + } + + concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) { + if (this != &other) { + // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately + data = std::move(other.data); + mark = other.mark; + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + return *this; + } + + concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) { + assign(init.begin(), init.end()); + return *this; + } + + template <typename InputIterator> + void assign( InputIterator begin, InputIterator end ) { + data.assign(begin, end); + mark = 0; + my_size.store(data.size(), std::memory_order_relaxed); + heapify(); + } + + void assign( std::initializer_list<value_type> init ) { + assign(init.begin(), init.end()); + } + + /* Returned value may not reflect results of pending operations. + This operation reads shared data and will trigger a race condition. */ + __TBB_nodiscard bool empty() const { return size() == 0; } + + // Returns the current number of elements contained in the queue + /* Returned value may not reflect results of pending operations. + This operation reads shared data and will trigger a race condition. */ + size_type size() const { return my_size.load(std::memory_order_relaxed); } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + void push( const value_type& value ) { + cpq_operation op_data(value, PUSH_OP); + my_aggregator.execute(&op_data); + if (op_data.status == FAILED) + throw_exception(exception_id::bad_alloc); + } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + void push( value_type&& value ) { + cpq_operation op_data(value, PUSH_RVALUE_OP); + my_aggregator.execute(&op_data); + if (op_data.status == FAILED) + throw_exception(exception_id::bad_alloc); + } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + template <typename... Args> + void emplace( Args&&... args ) { + // TODO: support uses allocator construction in this place + push(value_type(std::forward<Args>(args)...)); + } + + // Gets a reference to and removes highest priority element + /* If a highest priority element was found, sets elem and returns true, + otherwise returns false. + This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + bool try_pop( value_type& value ) { + cpq_operation op_data(value, POP_OP); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // This operation affects the whole container => it is not thread-safe + void clear() { + data.clear(); + mark = 0; + my_size.store(0, std::memory_order_relaxed); + } + + // This operation affects the whole container => it is not thread-safe + void swap( concurrent_priority_queue& other ) { + if (this != &other) { + using std::swap; + swap(data, other.data); + swap(mark, other.mark); + + size_type sz = my_size.load(std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(sz, std::memory_order_relaxed); + } + } + + allocator_type get_allocator() const { return data.get_allocator(); } +private: + enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP}; + enum operation_status {WAIT = 0, SUCCEEDED, FAILED}; + + class cpq_operation : public aggregated_operation<cpq_operation> { + public: + operation_type type; + union { + value_type* elem; + size_type sz; + }; + cpq_operation( const value_type& value, operation_type t ) + : type(t), elem(const_cast<value_type*>(&value)) {} + }; // class cpq_operation + + class functor { + concurrent_priority_queue* my_cpq; + public: + functor() : my_cpq(nullptr) {} + functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {} + + void operator()(cpq_operation* op_list) { + __TBB_ASSERT(my_cpq != nullptr, "Invalid functor"); + my_cpq->handle_operations(op_list); + } + }; // class functor + + void handle_operations( cpq_operation* op_list ) { + call_itt_notify(acquired, this); + cpq_operation* tmp, *pop_list = nullptr; + __TBB_ASSERT(mark == data.size(), NULL); + + // First pass processes all constant (amortized; reallocation may happen) time pushes and pops. + while(op_list) { + // ITT note: &(op_list->status) tag is used to cover accesses to op_list + // node. This thread is going to handle the operation, and so will acquire it + // and perform the associated operation w/o triggering a race condition; the + // thread that created the operation is waiting on the status field, so when + // this thread is done with the operation, it will perform a + // store_with_release to give control back to the waiting thread in + // aggregator::insert_operation. + // TODO: enable + call_itt_notify(acquired, &(op_list->status)); + __TBB_ASSERT(op_list->type != INVALID_OP, NULL); + + tmp = op_list; + op_list = op_list->next.load(std::memory_order_relaxed); + if (tmp->type == POP_OP) { + if (mark < data.size() && + my_compare(data[0], data.back())) + { + // there are newly pushed elems and the last one is higher than top + *(tmp->elem) = std::move(data.back()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + + data.pop_back(); + __TBB_ASSERT(mark <= data.size(), NULL); + } else { // no convenient item to pop; postpone + tmp->next.store(pop_list, std::memory_order_relaxed); + pop_list = tmp; + } + } else { // PUSH_OP or PUSH_RVALUE_OP + __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation"); +#if TBB_USE_EXCEPTIONS + try +#endif + { + if (tmp->type == PUSH_OP) { + push_back_helper(*(tmp->elem)); + } else { + data.push_back(std::move(*(tmp->elem))); + } + my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + } +#if TBB_USE_EXCEPTIONS + catch(...) { + tmp->status.store(uintptr_t(FAILED), std::memory_order_release); + } +#endif + } + } + + // Second pass processes pop operations + while(pop_list) { + tmp = pop_list; + pop_list = pop_list->next.load(std::memory_order_relaxed); + __TBB_ASSERT(tmp->type == POP_OP, NULL); + if (data.empty()) { + tmp->status.store(uintptr_t(FAILED), std::memory_order_release); + } else { + __TBB_ASSERT(mark <= data.size(), NULL); + if (mark < data.size() && + my_compare(data[0], data.back())) + { + // there are newly pushed elems and the last one is higher than top + *(tmp->elem) = std::move(data.back()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + data.pop_back(); + } else { // extract top and push last element down heap + *(tmp->elem) = std::move(data[0]); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + reheap(); + } + } + } + + // heapify any leftover pushed elements before doing the next + // batch of operations + if (mark < data.size()) heapify(); + __TBB_ASSERT(mark == data.size(), NULL); + call_itt_notify(releasing, this); + } + + // Merge unsorted elements into heap + void heapify() { + if (!mark && data.size() > 0) mark = 1; + for (; mark < data.size(); ++mark) { + // for each unheapified element under size + size_type cur_pos = mark; + value_type to_place = std::move(data[mark]); + do { // push to_place up the heap + size_type parent = (cur_pos - 1) >> 1; + if (!my_compare(data[parent], to_place)) + break; + data[cur_pos] = std::move(data[parent]); + cur_pos = parent; + } while(cur_pos); + data[cur_pos] = std::move(to_place); + } + } + + // Re-heapify after an extraction + // Re-heapify by pushing last element down the heap from the root. + void reheap() { + size_type cur_pos = 0, child = 1; + + while(child < mark) { + size_type target = child; + if (child + 1 < mark && my_compare(data[child], data[child + 1])) + ++target; + // target now has the higher priority child + if (my_compare(data[target], data.back())) + break; + data[cur_pos] = std::move(data[target]); + cur_pos = target; + child = (cur_pos << 1) + 1; + } + if (cur_pos != data.size() - 1) + data[cur_pos] = std::move(data.back()); + data.pop_back(); + if (mark > data.size()) mark = data.size(); + } + + void push_back_helper( const T& value ) { + push_back_helper_impl(value, std::is_copy_constructible<T>{}); + } + + void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) { + data.push_back(value); + } + + void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) { + __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type"); + } + + using aggregator_type = aggregator<functor, cpq_operation>; + + aggregator_type my_aggregator; + // Padding added to avoid false sharing + char padding1[max_nfs_size - sizeof(aggregator_type)]; + // The point at which unsorted elements begin + size_type mark; + std::atomic<size_type> my_size; + Compare my_compare; + + // Padding added to avoid false sharing + char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)]; + //! Storage for the heap of elements in queue, plus unheapified elements + /** data has the following structure: + + binary unheapified + heap elements + ____|_______|____ + | | | + v v v + [_|...|_|_|...|_| |...| ] + 0 ^ ^ ^ + | | |__capacity + | |__my_size + |__mark + + Thus, data stores the binary heap starting at position 0 through + mark-1 (it may be empty). Then there are 0 or more elements + that have not yet been inserted into the heap, in positions + mark through my_size-1. */ + + using vector_type = std::vector<value_type, allocator_type>; + vector_type data; + + friend bool operator==( const concurrent_priority_queue& lhs, + const concurrent_priority_queue& rhs ) + { + return lhs.data == rhs.data; + } + +#if !__TBB_CPP20_COMPARISONS_PRESENT + friend bool operator!=( const concurrent_priority_queue& lhs, + const concurrent_priority_queue& rhs ) + { + return !(lhs == rhs); + } +#endif +}; // class concurrent_priority_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_priority_queue( It, It, Alloc ) +-> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; + +template <typename T, + typename Comp = std::less<T>, + typename Alloc = tbb::cache_aligned_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_priority_queue<T, Comp, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_priority_queue( std::initializer_list<T>, Alloc ) +-> concurrent_priority_queue<T, std::less<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename T, typename Compare, typename Allocator> +void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs, + concurrent_priority_queue<T, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail +inline namespace v1 { +using detail::d1::concurrent_priority_queue; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_priority_queue_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h index c8ae7afff7..7b4f2fb766 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h @@ -1,592 +1,592 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_queue_H -#define __TBB_concurrent_queue_H - -#include "detail/_namespace_injection.h" -#include "detail/_concurrent_queue_base.h" -#include "detail/_allocator_traits.h" -#include "detail/_exception.h" -#include "detail/_containers_helpers.h" -#include "cache_aligned_allocator.h" - -namespace tbb { -namespace detail { -namespace d1 { - -// A high-performance thread-safe non-blocking concurrent queue. -// Multiple threads may each push and pop concurrently. -// Assignment construction is not allowed. -template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> -class concurrent_queue { - using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; - using queue_representation_type = concurrent_queue_rep<T, Allocator>; - using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; - using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; -public: - using size_type = std::size_t; - using value_type = T; - using reference = T&; - using const_reference = const T&; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>; - using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>; - - concurrent_queue() : concurrent_queue(allocator_type()) {} - - explicit concurrent_queue(const allocator_type& a) : - my_allocator(a), my_queue_representation(nullptr) - { - my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type))); - queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); - - __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); - } - - template <typename InputIterator> - concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : - concurrent_queue(a) - { - for (; begin != end; ++begin) - push(*begin); - } - - concurrent_queue(const concurrent_queue& src, const allocator_type& a) : - concurrent_queue(a) - { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); - } - - concurrent_queue(const concurrent_queue& src) : - concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) - { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); - } - - // Move constructors - concurrent_queue(concurrent_queue&& src) : - concurrent_queue(std::move(src.my_allocator)) - { - internal_swap(src); - } - - concurrent_queue(concurrent_queue&& src, const allocator_type& a) : - concurrent_queue(a) - { - // checking that memory allocated by one instance of allocator can be deallocated - // with another - if (my_allocator == src.my_allocator) { - internal_swap(src); - } else { - // allocators are different => performing per-element move - my_queue_representation->assign(*src.my_queue_representation, move_construct_item); - src.clear(); - } - } - - // Destroy queue - ~concurrent_queue() { - clear(); - my_queue_representation->clear(); - queue_allocator_traits::destroy(my_allocator, my_queue_representation); - r1::cache_aligned_deallocate(my_queue_representation); - } - - // Enqueue an item at tail of queue. - void push(const T& value) { - internal_push(value); - } - - void push(T&& value) { - internal_push(std::move(value)); - } - - template <typename... Args> - void emplace( Args&&... args ) { - internal_push(std::forward<Args>(args)...); - } - - // Attempt to dequeue an item from head of queue. - /** Does not wait for item to become available. - Returns true if successful; false otherwise. */ - bool try_pop( T& result ) { - return internal_try_pop(&result); - } - - // Return the number of items in the queue; thread unsafe - size_type unsafe_size() const { - std::ptrdiff_t size = my_queue_representation->size(); - return size < 0 ? 0 : size_type(size); - } - - // Equivalent to size()==0. - __TBB_nodiscard bool empty() const { - return my_queue_representation->empty(); - } - - // Clear the queue. not thread-safe. - void clear() { - while (!empty()) { - T value; - try_pop(value); - } - } - - // Return allocator object - allocator_type get_allocator() const { return my_allocator; } - - //------------------------------------------------------------------------ - // The iterators are intended only for debugging. They are slow and not thread safe. - //------------------------------------------------------------------------ - - iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } - iterator unsafe_end() { return iterator(); } - const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } - const_iterator unsafe_end() const { return const_iterator(); } - const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } - const_iterator unsafe_cend() const { return const_iterator(); } - -private: - void internal_swap(concurrent_queue& src) { - using std::swap; - swap(my_queue_representation, src.my_queue_representation); - } - - template <typename... Args> - void internal_push( Args&&... args ) { - ticket_type k = my_queue_representation->tail_counter++; - my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...); - } - - bool internal_try_pop( void* dst ) { - ticket_type k; - do { - k = my_queue_representation->head_counter.load(std::memory_order_relaxed); - do { - if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { - // Queue is empty - return false; - } - - // Queue had item with ticket k when we looked. Attempt to get that item. - // Another thread snatched the item, retry. - } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); - } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation)); - return true; - } - - template <typename Container, typename Value, typename A> - friend class concurrent_queue_iterator; - - static void copy_construct_item(T* location, const void* src) { - // TODO: use allocator_traits for copy construction - new (location) value_type(*static_cast<const value_type*>(src)); - // queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src)); - } - - static void move_construct_item(T* location, const void* src) { - // TODO: use allocator_traits for move construction - new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); - } - - queue_allocator_type my_allocator; - queue_representation_type* my_queue_representation; -}; // class concurrent_queue - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -// Deduction guide for the constructor from two iterators -template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_queue( It, It, Alloc = Alloc() ) --> concurrent_queue<iterator_value_t<It>, Alloc>; - -#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ - -class concurrent_monitor; - -template <typename FuncType> -class delegated_function : public delegate_base { -public: - delegated_function(FuncType& f) : my_func(f) {} - - bool operator()() const override { - return my_func(); - } - -private: - FuncType &my_func; -}; // class delegated_function - -// The concurrent monitor tags for concurrent_bounded_queue. -static constexpr std::size_t cbq_slots_avail_tag = 0; -static constexpr std::size_t cbq_items_avail_tag = 1; -} // namespace d1 - - -namespace r1 { - class concurrent_monitor; - - std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); - void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); - void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); - void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag - , std::size_t ticket ); - void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, - std::ptrdiff_t target, d1::delegate_base& predicate ); -} // namespace r1 - - -namespace d1 { -// A high-performance thread-safe blocking concurrent bounded queue. -// Supports boundedness and blocking semantics. -// Multiple threads may each push and pop concurrently. -// Assignment construction is not allowed. -template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> -class concurrent_bounded_queue { - using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; - using queue_representation_type = concurrent_queue_rep<T, Allocator>; - using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; - using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; - - template <typename FuncType> - void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { - delegated_function<FuncType> func(pred); - r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); - } -public: - using size_type = std::ptrdiff_t; - using value_type = T; - using reference = T&; - using const_reference = const T&; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>; - using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ; - - concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {} - - explicit concurrent_bounded_queue( const allocator_type& a ) : - my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr) - { - my_queue_representation = reinterpret_cast<queue_representation_type*>( - r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); - my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1); - queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); - my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); - - __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); - __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); - } - - template <typename InputIterator> - concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) : - concurrent_bounded_queue(a) - { - for (; begin != end; ++begin) - push(*begin); - } - - concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : - concurrent_bounded_queue(a) - { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); - } - - concurrent_bounded_queue( const concurrent_bounded_queue& src ) : - concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) - { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); - } - - // Move constructors - concurrent_bounded_queue( concurrent_bounded_queue&& src ) : - concurrent_bounded_queue(std::move(src.my_allocator)) - { - internal_swap(src); - } - - concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) : - concurrent_bounded_queue(a) - { - // checking that memory allocated by one instance of allocator can be deallocated - // with another - if (my_allocator == src.my_allocator) { - internal_swap(src); - } else { - // allocators are different => performing per-element move - my_queue_representation->assign(*src.my_queue_representation, move_construct_item); - src.clear(); - } - } - - // Destroy queue - ~concurrent_bounded_queue() { - clear(); - my_queue_representation->clear(); - queue_allocator_traits::destroy(my_allocator, my_queue_representation); - r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation), - sizeof(queue_representation_type)); - } - - // Enqueue an item at tail of queue. - void push( const T& value ) { - internal_push(value); - } - - void push( T&& value ) { - internal_push(std::move(value)); - } - - // Enqueue an item at tail of queue if queue is not already full. - // Does not wait for queue to become not full. - // Returns true if item is pushed; false if queue was already full. - bool try_push( const T& value ) { - return internal_push_if_not_full(value); - } - - bool try_push( T&& value ) { - return internal_push_if_not_full(std::move(value)); - } - - template <typename... Args> - void emplace( Args&&... args ) { - internal_push(std::forward<Args>(args)...); - } - - template <typename... Args> - bool try_emplace( Args&&... args ) { - return internal_push_if_not_full(std::forward<Args>(args)...); - } - - // Attempt to dequeue an item from head of queue. - /** Does not wait for item to become available. - Returns true if successful; false otherwise. */ - bool pop( T& result ) { - return internal_pop(&result); - } - - bool try_pop( T& result ) { - return internal_pop_if_present(&result); - } - - void abort() { - internal_abort(); - } - - // Return the number of items in the queue; thread unsafe - std::ptrdiff_t size() const { - return my_queue_representation->size(); - } - - void set_capacity( size_type new_capacity ) { - std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity; - my_capacity = c; - } - - size_type capacity() const { - return my_capacity; - } - - // Equivalent to size()==0. - __TBB_nodiscard bool empty() const { - return my_queue_representation->empty(); - } - - // Clear the queue. not thread-safe. - void clear() { - while (!empty()) { - T value; - try_pop(value); - } - } - - // Return allocator object - allocator_type get_allocator() const { return my_allocator; } - - //------------------------------------------------------------------------ - // The iterators are intended only for debugging. They are slow and not thread safe. - //------------------------------------------------------------------------ - - iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } - iterator unsafe_end() { return iterator(); } - const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } - const_iterator unsafe_end() const { return const_iterator(); } - const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } - const_iterator unsafe_cend() const { return const_iterator(); } - -private: - void internal_swap( concurrent_bounded_queue& src ) { - std::swap(my_queue_representation, src.my_queue_representation); - std::swap(my_monitors, src.my_monitors); - } - - static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2); - - template <typename... Args> - void internal_push( Args&&... args ) { - unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); - ticket_type ticket = my_queue_representation->tail_counter++; - std::ptrdiff_t target = ticket - my_capacity; - - if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full - auto pred = [&] { - if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { - throw_exception(exception_id::user_abort); - } - - return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target; - }; - - try_call( [&] { - internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); - }).on_exception( [&] { - my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation); - }); - - } - __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); - my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); - r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); - } - - template <typename... Args> - bool internal_push_if_not_full( Args&&... args ) { - ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed); - do { - if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) { - // Queue is full - return false; - } - // Queue had empty slot with ticket k when we looked. Attempt to claim that slot. - // Another thread claimed the slot, so retry. - } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); - - my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); - r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); - return true; - } - - bool internal_pop( void* dst ) { - std::ptrdiff_t target; - // This loop is a single pop operation; abort_counter should not be re-read inside - unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); - - do { - target = my_queue_representation->head_counter++; - if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) { - auto pred = [&] { - if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { - throw_exception(exception_id::user_abort); - } - - return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target; - }; - - try_call( [&] { - internal_wait(my_monitors, cbq_items_avail_tag, target, pred); - }).on_exception( [&] { - my_queue_representation->head_counter--; - }); - } - __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); - } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation)); - - r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); - return true; - } - - bool internal_pop_if_present( void* dst ) { - ticket_type ticket; - do { - ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); - do { - if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty - // Queue is empty - return false; - } - // Queue had item with ticket k when we looked. Attempt to get that item. - // Another thread snatched the item, retry. - } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); - } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation)); - - r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); - return true; - } - - void internal_abort() { - ++my_abort_counter; - r1::abort_bounded_queue_monitors(my_monitors); - } - - static void copy_construct_item(T* location, const void* src) { - // TODO: use allocator_traits for copy construction - new (location) value_type(*static_cast<const value_type*>(src)); - } - - static void move_construct_item(T* location, const void* src) { - // TODO: use allocator_traits for move construction - new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); - } - - template <typename Container, typename Value, typename A> - friend class concurrent_queue_iterator; - - queue_allocator_type my_allocator; - std::ptrdiff_t my_capacity; - std::atomic<unsigned> my_abort_counter; - queue_representation_type* my_queue_representation; - - r1::concurrent_monitor* my_monitors; -}; // class concurrent_bounded_queue - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -// Deduction guide for the constructor from two iterators -template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>> -concurrent_bounded_queue( It, It, Alloc = Alloc() ) --> concurrent_bounded_queue<iterator_value_t<It>, Alloc>; - -#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ - -} //namespace d1 -} // namesapce detail - -inline namespace v1 { - -using detail::d1::concurrent_queue; -using detail::d1::concurrent_bounded_queue; -using detail::r1::user_abort; -using detail::r1::bad_last_alloc; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_queue_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_queue_H +#define __TBB_concurrent_queue_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_queue_base.h" +#include "detail/_allocator_traits.h" +#include "detail/_exception.h" +#include "detail/_containers_helpers.h" +#include "cache_aligned_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { + +// A high-performance thread-safe non-blocking concurrent queue. +// Multiple threads may each push and pop concurrently. +// Assignment construction is not allowed. +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_queue { + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + using queue_representation_type = concurrent_queue_rep<T, Allocator>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; + using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; +public: + using size_type = std::size_t; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>; + using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>; + + concurrent_queue() : concurrent_queue(allocator_type()) {} + + explicit concurrent_queue(const allocator_type& a) : + my_allocator(a), my_queue_representation(nullptr) + { + my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type))); + queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + + __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); + } + + template <typename InputIterator> + concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : + concurrent_queue(a) + { + for (; begin != end; ++begin) + push(*begin); + } + + concurrent_queue(const concurrent_queue& src, const allocator_type& a) : + concurrent_queue(a) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + concurrent_queue(const concurrent_queue& src) : + concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + // Move constructors + concurrent_queue(concurrent_queue&& src) : + concurrent_queue(std::move(src.my_allocator)) + { + internal_swap(src); + } + + concurrent_queue(concurrent_queue&& src, const allocator_type& a) : + concurrent_queue(a) + { + // checking that memory allocated by one instance of allocator can be deallocated + // with another + if (my_allocator == src.my_allocator) { + internal_swap(src); + } else { + // allocators are different => performing per-element move + my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + src.clear(); + } + } + + // Destroy queue + ~concurrent_queue() { + clear(); + my_queue_representation->clear(); + queue_allocator_traits::destroy(my_allocator, my_queue_representation); + r1::cache_aligned_deallocate(my_queue_representation); + } + + // Enqueue an item at tail of queue. + void push(const T& value) { + internal_push(value); + } + + void push(T&& value) { + internal_push(std::move(value)); + } + + template <typename... Args> + void emplace( Args&&... args ) { + internal_push(std::forward<Args>(args)...); + } + + // Attempt to dequeue an item from head of queue. + /** Does not wait for item to become available. + Returns true if successful; false otherwise. */ + bool try_pop( T& result ) { + return internal_try_pop(&result); + } + + // Return the number of items in the queue; thread unsafe + size_type unsafe_size() const { + std::ptrdiff_t size = my_queue_representation->size(); + return size < 0 ? 0 : size_type(size); + } + + // Equivalent to size()==0. + __TBB_nodiscard bool empty() const { + return my_queue_representation->empty(); + } + + // Clear the queue. not thread-safe. + void clear() { + while (!empty()) { + T value; + try_pop(value); + } + } + + // Return allocator object + allocator_type get_allocator() const { return my_allocator; } + + //------------------------------------------------------------------------ + // The iterators are intended only for debugging. They are slow and not thread safe. + //------------------------------------------------------------------------ + + iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } + iterator unsafe_end() { return iterator(); } + const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_end() const { return const_iterator(); } + const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_cend() const { return const_iterator(); } + +private: + void internal_swap(concurrent_queue& src) { + using std::swap; + swap(my_queue_representation, src.my_queue_representation); + } + + template <typename... Args> + void internal_push( Args&&... args ) { + ticket_type k = my_queue_representation->tail_counter++; + my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...); + } + + bool internal_try_pop( void* dst ) { + ticket_type k; + do { + k = my_queue_representation->head_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { + // Queue is empty + return false; + } + + // Queue had item with ticket k when we looked. Attempt to get that item. + // Another thread snatched the item, retry. + } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); + } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation)); + return true; + } + + template <typename Container, typename Value, typename A> + friend class concurrent_queue_iterator; + + static void copy_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for copy construction + new (location) value_type(*static_cast<const value_type*>(src)); + // queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src)); + } + + static void move_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for move construction + new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); + } + + queue_allocator_type my_allocator; + queue_representation_type* my_queue_representation; +}; // class concurrent_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_queue( It, It, Alloc = Alloc() ) +-> concurrent_queue<iterator_value_t<It>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +class concurrent_monitor; + +template <typename FuncType> +class delegated_function : public delegate_base { +public: + delegated_function(FuncType& f) : my_func(f) {} + + bool operator()() const override { + return my_func(); + } + +private: + FuncType &my_func; +}; // class delegated_function + +// The concurrent monitor tags for concurrent_bounded_queue. +static constexpr std::size_t cbq_slots_avail_tag = 0; +static constexpr std::size_t cbq_items_avail_tag = 1; +} // namespace d1 + + +namespace r1 { + class concurrent_monitor; + + std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); + void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); + void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); + void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag + , std::size_t ticket ); + void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, + std::ptrdiff_t target, d1::delegate_base& predicate ); +} // namespace r1 + + +namespace d1 { +// A high-performance thread-safe blocking concurrent bounded queue. +// Supports boundedness and blocking semantics. +// Multiple threads may each push and pop concurrently. +// Assignment construction is not allowed. +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_bounded_queue { + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + using queue_representation_type = concurrent_queue_rep<T, Allocator>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; + using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; + + template <typename FuncType> + void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { + delegated_function<FuncType> func(pred); + r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); + } +public: + using size_type = std::ptrdiff_t; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>; + using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ; + + concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {} + + explicit concurrent_bounded_queue( const allocator_type& a ) : + my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr) + { + my_queue_representation = reinterpret_cast<queue_representation_type*>( + r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); + my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1); + queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); + + __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); + } + + template <typename InputIterator> + concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) : + concurrent_bounded_queue(a) + { + for (; begin != end; ++begin) + push(*begin); + } + + concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : + concurrent_bounded_queue(a) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + concurrent_bounded_queue( const concurrent_bounded_queue& src ) : + concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + // Move constructors + concurrent_bounded_queue( concurrent_bounded_queue&& src ) : + concurrent_bounded_queue(std::move(src.my_allocator)) + { + internal_swap(src); + } + + concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) : + concurrent_bounded_queue(a) + { + // checking that memory allocated by one instance of allocator can be deallocated + // with another + if (my_allocator == src.my_allocator) { + internal_swap(src); + } else { + // allocators are different => performing per-element move + my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + src.clear(); + } + } + + // Destroy queue + ~concurrent_bounded_queue() { + clear(); + my_queue_representation->clear(); + queue_allocator_traits::destroy(my_allocator, my_queue_representation); + r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation), + sizeof(queue_representation_type)); + } + + // Enqueue an item at tail of queue. + void push( const T& value ) { + internal_push(value); + } + + void push( T&& value ) { + internal_push(std::move(value)); + } + + // Enqueue an item at tail of queue if queue is not already full. + // Does not wait for queue to become not full. + // Returns true if item is pushed; false if queue was already full. + bool try_push( const T& value ) { + return internal_push_if_not_full(value); + } + + bool try_push( T&& value ) { + return internal_push_if_not_full(std::move(value)); + } + + template <typename... Args> + void emplace( Args&&... args ) { + internal_push(std::forward<Args>(args)...); + } + + template <typename... Args> + bool try_emplace( Args&&... args ) { + return internal_push_if_not_full(std::forward<Args>(args)...); + } + + // Attempt to dequeue an item from head of queue. + /** Does not wait for item to become available. + Returns true if successful; false otherwise. */ + bool pop( T& result ) { + return internal_pop(&result); + } + + bool try_pop( T& result ) { + return internal_pop_if_present(&result); + } + + void abort() { + internal_abort(); + } + + // Return the number of items in the queue; thread unsafe + std::ptrdiff_t size() const { + return my_queue_representation->size(); + } + + void set_capacity( size_type new_capacity ) { + std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity; + my_capacity = c; + } + + size_type capacity() const { + return my_capacity; + } + + // Equivalent to size()==0. + __TBB_nodiscard bool empty() const { + return my_queue_representation->empty(); + } + + // Clear the queue. not thread-safe. + void clear() { + while (!empty()) { + T value; + try_pop(value); + } + } + + // Return allocator object + allocator_type get_allocator() const { return my_allocator; } + + //------------------------------------------------------------------------ + // The iterators are intended only for debugging. They are slow and not thread safe. + //------------------------------------------------------------------------ + + iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } + iterator unsafe_end() { return iterator(); } + const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_end() const { return const_iterator(); } + const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_cend() const { return const_iterator(); } + +private: + void internal_swap( concurrent_bounded_queue& src ) { + std::swap(my_queue_representation, src.my_queue_representation); + std::swap(my_monitors, src.my_monitors); + } + + static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2); + + template <typename... Args> + void internal_push( Args&&... args ) { + unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); + ticket_type ticket = my_queue_representation->tail_counter++; + std::ptrdiff_t target = ticket - my_capacity; + + if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full + auto pred = [&] { + if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { + throw_exception(exception_id::user_abort); + } + + return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target; + }; + + try_call( [&] { + internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); + }).on_exception( [&] { + my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation); + }); + + } + __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); + } + + template <typename... Args> + bool internal_push_if_not_full( Args&&... args ) { + ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) { + // Queue is full + return false; + } + // Queue had empty slot with ticket k when we looked. Attempt to claim that slot. + // Another thread claimed the slot, so retry. + } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); + + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); + return true; + } + + bool internal_pop( void* dst ) { + std::ptrdiff_t target; + // This loop is a single pop operation; abort_counter should not be re-read inside + unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); + + do { + target = my_queue_representation->head_counter++; + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) { + auto pred = [&] { + if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { + throw_exception(exception_id::user_abort); + } + + return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target; + }; + + try_call( [&] { + internal_wait(my_monitors, cbq_items_avail_tag, target, pred); + }).on_exception( [&] { + my_queue_representation->head_counter--; + }); + } + __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); + } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation)); + + r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); + return true; + } + + bool internal_pop_if_present( void* dst ) { + ticket_type ticket; + do { + ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty + // Queue is empty + return false; + } + // Queue had item with ticket k when we looked. Attempt to get that item. + // Another thread snatched the item, retry. + } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); + } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation)); + + r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); + return true; + } + + void internal_abort() { + ++my_abort_counter; + r1::abort_bounded_queue_monitors(my_monitors); + } + + static void copy_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for copy construction + new (location) value_type(*static_cast<const value_type*>(src)); + } + + static void move_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for move construction + new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); + } + + template <typename Container, typename Value, typename A> + friend class concurrent_queue_iterator; + + queue_allocator_type my_allocator; + std::ptrdiff_t my_capacity; + std::atomic<unsigned> my_abort_counter; + queue_representation_type* my_queue_representation; + + r1::concurrent_monitor* my_monitors; +}; // class concurrent_bounded_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>> +concurrent_bounded_queue( It, It, Alloc = Alloc() ) +-> concurrent_bounded_queue<iterator_value_t<It>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +} //namespace d1 +} // namesapce detail + +inline namespace v1 { + +using detail::d1::concurrent_queue; +using detail::d1::concurrent_bounded_queue; +using detail::r1::user_abort; +using detail::r1::bad_last_alloc; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_queue_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h index c68fa6c362..6baee7f1e8 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h @@ -1,259 +1,259 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_set_H -#define __TBB_concurrent_set_H - -#include "detail/_namespace_injection.h" -#include "detail/_concurrent_skip_list.h" -#include "tbb_allocator.h" -#include <functional> -#include <utility> - -namespace tbb { -namespace detail { -namespace d1 { - -template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping> -struct set_traits { - static constexpr std::size_t max_level = RandomGenerator::max_level; - using random_level_generator_type = RandomGenerator; - using key_type = Key; - using value_type = key_type; - using compare_type = KeyCompare; - using value_compare = compare_type; - using reference = value_type&; - using const_reference = const value_type&; - using allocator_type = Allocator; - - static constexpr bool allow_multimapping = AllowMultimapping; - - static const key_type& get_key(const_reference val) { - return val; - } - - static value_compare value_comp(compare_type comp) { return comp; } -}; // struct set_traits - -template <typename Key, typename Compare, typename Allocator> -class concurrent_multiset; - -template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> -class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { - using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; -public: - using key_type = Key; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using key_compare = Compare; - using value_compare = typename base_type::value_compare; - using allocator_type = Allocator; - - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - - using node_type = typename base_type::node_type; - - // Include constructors of base_type - using base_type::base_type; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_set() = default; - concurrent_set( const concurrent_set& ) = default; - concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_set( concurrent_set&& ) = default; - concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_set& operator=( const concurrent_set& ) = default; - concurrent_set& operator=( concurrent_set&& ) = default; - - template<typename OtherCompare> - void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } - - template<typename OtherCompare> - void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_set - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Comp = std::less<iterator_value_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() ) --> concurrent_set<iterator_value_t<It>, Comp, Alloc>; - -template <typename Key, - typename Comp = std::less<Key>, - typename Alloc = tbb::tbb_allocator<Key>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) --> concurrent_set<Key, Comp, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_set( It, It, Alloc ) --> concurrent_set<iterator_value_t<It>, - std::less<iterator_value_t<It>>, Alloc>; - -template <typename Key, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_set( std::initializer_list<Key>, Alloc ) --> concurrent_set<Key, std::less<Key>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Compare, typename Allocator> -void swap( concurrent_set<Key, Compare, Allocator>& lhs, - concurrent_set<Key, Compare, Allocator>& rhs ) -{ - lhs.swap(rhs); -} - -template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> -class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { - using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; -public: - using key_type = Key; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using key_compare = Compare; - using value_compare = typename base_type::value_compare; - using allocator_type = Allocator; - - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - - using node_type = typename base_type::node_type; - - // Include constructors of base_type; - using base_type::base_type; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_multiset() = default; - concurrent_multiset( const concurrent_multiset& ) = default; - concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_multiset( concurrent_multiset&& ) = default; - concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_multiset& operator=( const concurrent_multiset& ) = default; - concurrent_multiset& operator=( concurrent_multiset&& ) = default; - - template<typename OtherCompare> - void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } - - template<typename OtherCompare> - void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { - this->internal_merge(source); - } - - template<typename OtherCompare> - void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_multiset - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Comp = std::less<iterator_value_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() ) --> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>; - -template <typename Key, - typename Comp = std::less<Key>, - typename Alloc = tbb::tbb_allocator<Key>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Comp>>> -concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) --> concurrent_multiset<Key, Comp, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_multiset( It, It, Alloc ) --> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; - -template <typename Key, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_multiset( std::initializer_list<Key>, Alloc ) --> concurrent_multiset<Key, std::less<Key>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Compare, typename Allocator> -void swap( concurrent_multiset<Key, Compare, Allocator>& lhs, - concurrent_multiset<Key, Compare, Allocator>& rhs ) -{ - lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - -using detail::d1::concurrent_set; -using detail::d1::concurrent_multiset; -using detail::split; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_set_H +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_set_H +#define __TBB_concurrent_set_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_skip_list.h" +#include "tbb_allocator.h" +#include <functional> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping> +struct set_traits { + static constexpr std::size_t max_level = RandomGenerator::max_level; + using random_level_generator_type = RandomGenerator; + using key_type = Key; + using value_type = key_type; + using compare_type = KeyCompare; + using value_compare = compare_type; + using reference = value_type&; + using const_reference = const value_type&; + using allocator_type = Allocator; + + static constexpr bool allow_multimapping = AllowMultimapping; + + static const key_type& get_key(const_reference val) { + return val; + } + + static value_compare value_comp(compare_type comp) { return comp; } +}; // struct set_traits + +template <typename Key, typename Compare, typename Allocator> +class concurrent_multiset; + +template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { + using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; +public: + using key_type = Key; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_set() = default; + concurrent_set( const concurrent_set& ) = default; + concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_set( concurrent_set&& ) = default; + concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_set& operator=( const concurrent_set& ) = default; + concurrent_set& operator=( concurrent_set&& ) = default; + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_set + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_set<iterator_value_t<It>, Comp, Alloc>; + +template <typename Key, + typename Comp = std::less<Key>, + typename Alloc = tbb::tbb_allocator<Key>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_set<Key, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_set( It, It, Alloc ) +-> concurrent_set<iterator_value_t<It>, + std::less<iterator_value_t<It>>, Alloc>; + +template <typename Key, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_set( std::initializer_list<Key>, Alloc ) +-> concurrent_set<Key, std::less<Key>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Compare, typename Allocator> +void swap( concurrent_set<Key, Compare, Allocator>& lhs, + concurrent_set<Key, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { + using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; +public: + using key_type = Key; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_multiset() = default; + concurrent_multiset( const concurrent_multiset& ) = default; + concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_multiset( concurrent_multiset&& ) = default; + concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_multiset& operator=( const concurrent_multiset& ) = default; + concurrent_multiset& operator=( concurrent_multiset&& ) = default; + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_multiset + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>; + +template <typename Key, + typename Comp = std::less<Key>, + typename Alloc = tbb::tbb_allocator<Key>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multiset<Key, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multiset( It, It, Alloc ) +-> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; + +template <typename Key, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multiset( std::initializer_list<Key>, Alloc ) +-> concurrent_multiset<Key, std::less<Key>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Compare, typename Allocator> +void swap( concurrent_multiset<Key, Compare, Allocator>& lhs, + concurrent_multiset<Key, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_set; +using detail::d1::concurrent_multiset; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_set_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h index 0c9c2cd79c..6e582f25e6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h @@ -1,387 +1,387 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_unordered_map_H -#define __TBB_concurrent_unordered_map_H - -#include "detail/_namespace_injection.h" -#include "detail/_concurrent_unordered_base.h" -#include "tbb_allocator.h" -#include <functional> - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> -struct concurrent_unordered_map_traits { - using value_type = std::pair<const Key, T>; - using key_type = Key; - using allocator_type = Allocator; - using hash_compare_type = hash_compare<Key, Hash, KeyEqual>; - static constexpr bool allow_multimapping = AllowMultimapping; - - static constexpr const key_type& get_key( const value_type& value ) { - return value.first; - } -}; // struct concurrent_unordered_map_traits - -template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> -class concurrent_unordered_multimap; - -template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, - typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > -class concurrent_unordered_map - : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>> -{ - using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>; - using base_type = concurrent_unordered_base<traits_type>; -public: - using key_type = typename base_type::key_type; - using mapped_type = T; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using hasher = typename base_type::hasher; - using key_equal = typename base_type::key_equal; - using allocator_type = typename base_type::allocator_type; - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - using local_iterator = typename base_type::local_iterator; - using const_local_iterator = typename base_type::const_local_iterator; - using node_type = typename base_type::node_type; - - // Include constructors of base type - using base_type::base_type; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_unordered_map() = default; - concurrent_unordered_map( const concurrent_unordered_map& ) = default; - concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_unordered_map( concurrent_unordered_map&& ) = default; - concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default; - concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default; - - // Observers - mapped_type& operator[]( const key_type& key ) { - iterator where = this->find(key); - - if (where == this->end()) { - where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; - } - return where->second; - } - - mapped_type& operator[]( key_type&& key ) { - iterator where = this->find(key); - - if (where == this->end()) { - where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; - } - return where->second; - } - - mapped_type& at( const key_type& key ) { - iterator where = this->find(key); - - if (where == this->end()) { - throw_exception(exception_id::invalid_key); - } - return where->second; - } - - const mapped_type& at( const key_type& key ) const { - const_iterator where = this->find(key); - - if (where == this->end()) { - throw_exception(exception_id::out_of_range); - } - return where->second; - } - - using base_type::insert; - - template<typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - std::pair<iterator, bool>>::type insert( P&& value ) { - return this->emplace(std::forward<P>(value)); - } - - template<typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - iterator>::type insert( const_iterator hint, P&& value ) { - return this->emplace_hint(hint, std::forward<P>(value)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_unordered_map - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename It, - typename Hash = std::hash<iterator_key_t<It>>, - typename KeyEq = std::equal_to<iterator_key_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_map( It, It, std::size_t = {}, - Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; - -template <typename Key, typename T, - typename Hash = std::hash<std::remove_const_t<Key>>, - typename KeyEq = std::equal_to<std::remove_const_t<Key>>, - typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, - Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_map( It, It, std::size_t, Alloc ) --> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, - std::hash<iterator_key_t<It>>, - std::equal_to<iterator_key_t<It>>, Alloc>; - -// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed - -template <typename It, typename Hash, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_map( It, It, std::size_t, Hash, Alloc ) --> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, - Hash, std::equal_to<iterator_key_t<It>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) --> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc ) --> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -template <typename Key, typename T, typename Hash, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) --> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> -void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs, - concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) { - lhs.swap(rhs); -} - -template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, - typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > -class concurrent_unordered_multimap - : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>> -{ - using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>; - using base_type = concurrent_unordered_base<traits_type>; -public: - using key_type = typename base_type::key_type; - using mapped_type = T; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using hasher = typename base_type::hasher; - using key_equal = typename base_type::key_equal; - using allocator_type = typename base_type::allocator_type; - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - using local_iterator = typename base_type::local_iterator; - using const_local_iterator = typename base_type::const_local_iterator; - using node_type = typename base_type::node_type; - - // Include constructors of base type - using base_type::base_type; - using base_type::operator=; - using base_type::insert; - - // Required for implicit deduction guides - concurrent_unordered_multimap() = default; - concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default; - concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default; - concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default; - concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default; - - template <typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - std::pair<iterator, bool>>::type insert( P&& value ) { - return this->emplace(std::forward<P>(value)); - } - - template<typename P> - typename std::enable_if<std::is_constructible<value_type, P&&>::value, - iterator>::type insert( const_iterator hint, P&& value ) { - return this->emplace_hint(hint, std::forward<P&&>(value)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_unordered_multimap - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Hash = std::hash<iterator_key_t<It>>, - typename KeyEq = std::equal_to<iterator_key_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; - -template <typename Key, typename T, - typename Hash = std::hash<std::remove_const_t<Key>>, - typename KeyEq = std::equal_to<std::remove_const_t<Key>>, - typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, - Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multimap( It, It, std::size_t, Alloc ) --> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, - std::hash<iterator_key_t<It>>, - std::equal_to<iterator_key_t<It>>, Alloc>; - -template <typename It, typename Hash, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc ) --> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, - std::equal_to<iterator_key_t<It>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) --> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -template <typename Key, typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) --> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -template <typename Key, typename T, typename Hash, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) --> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, - std::equal_to<std::remove_const_t<Key>>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> -void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs, - concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) { - lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - -using detail::d1::concurrent_unordered_map; -using detail::d1::concurrent_unordered_multimap; -using detail::split; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_unordered_map_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_unordered_map_H +#define __TBB_concurrent_unordered_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_unordered_base.h" +#include "tbb_allocator.h" +#include <functional> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> +struct concurrent_unordered_map_traits { + using value_type = std::pair<const Key, T>; + using key_type = Key; + using allocator_type = Allocator; + using hash_compare_type = hash_compare<Key, Hash, KeyEqual>; + static constexpr bool allow_multimapping = AllowMultimapping; + + static constexpr const key_type& get_key( const value_type& value ) { + return value.first; + } +}; // struct concurrent_unordered_map_traits + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +class concurrent_unordered_multimap; + +template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > +class concurrent_unordered_map + : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>> +{ + using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using mapped_type = T; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_unordered_map() = default; + concurrent_unordered_map( const concurrent_unordered_map& ) = default; + concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_map( concurrent_unordered_map&& ) = default; + concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default; + concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default; + + // Observers + mapped_type& operator[]( const key_type& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; + } + return where->second; + } + + mapped_type& operator[]( key_type&& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; + } + return where->second; + } + + mapped_type& at( const key_type& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + throw_exception(exception_id::invalid_key); + } + return where->second; + } + + const mapped_type& at( const key_type& key ) const { + const_iterator where = this->find(key); + + if (where == this->end()) { + throw_exception(exception_id::out_of_range); + } + return where->second; + } + + using base_type::insert; + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) { + return this->emplace(std::forward<P>(value)); + } + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_map + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Hash = std::hash<iterator_key_t<It>>, + typename KeyEq = std::equal_to<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( It, It, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; + +template <typename Key, typename T, + typename Hash = std::hash<std::remove_const_t<Key>>, + typename KeyEq = std::equal_to<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( It, It, std::size_t, Alloc ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, + std::hash<iterator_key_t<It>>, + std::equal_to<iterator_key_t<It>>, Alloc>; + +// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, + Hash, std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > +class concurrent_unordered_multimap + : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>> +{ + using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using mapped_type = T; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + using base_type::insert; + + // Required for implicit deduction guides + concurrent_unordered_multimap() = default; + concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default; + concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default; + concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default; + concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) { + return this->emplace(std::forward<P>(value)); + } + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) { + return this->emplace_hint(hint, std::forward<P&&>(value)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_multimap + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Hash = std::hash<iterator_key_t<It>>, + typename KeyEq = std::equal_to<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; + +template <typename Key, typename T, + typename Hash = std::hash<std::remove_const_t<Key>>, + typename KeyEq = std::equal_to<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( It, It, std::size_t, Alloc ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, + std::hash<iterator_key_t<It>>, + std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, + std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_unordered_map; +using detail::d1::concurrent_unordered_multimap; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_unordered_map_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h index ce6175294d..bfe3a9785f 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h @@ -1,306 +1,306 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_unordered_set_H -#define __TBB_concurrent_unordered_set_H - -#include "detail/_namespace_injection.h" -#include "detail/_concurrent_unordered_base.h" -#include "tbb_allocator.h" - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> -struct concurrent_unordered_set_traits { - using key_type = Key; - using value_type = key_type; - using allocator_type = Allocator; - using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>; - static constexpr bool allow_multimapping = AllowMultimapping; - - static constexpr const key_type& get_key( const value_type& value ) { - return value; - } -}; // class concurrent_unordered_set_traits - -template <typename Key, typename Hash, typename KeyEqual, typename Allocator> -class concurrent_unordered_multiset; - -template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, - typename Allocator = tbb::tbb_allocator<Key>> -class concurrent_unordered_set - : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>> -{ - using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>; - using base_type = concurrent_unordered_base<traits_type>; -public: - using key_type = typename base_type::key_type; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using hasher = typename base_type::hasher; - using key_equal = typename base_type::key_equal; - using allocator_type = typename base_type::allocator_type; - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - using local_iterator = typename base_type::local_iterator; - using const_local_iterator = typename base_type::const_local_iterator; - using node_type = typename base_type::node_type; - - // Include constructors of base_type; - using base_type::base_type; - using base_type::operator=; - // Required for implicit deduction guides - concurrent_unordered_set() = default; - concurrent_unordered_set( const concurrent_unordered_set& ) = default; - concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_unordered_set( concurrent_unordered_set&& ) = default; - concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default; - concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default; - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_unordered_set - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename It, - typename Hash = std::hash<iterator_value_t<It>>, - typename KeyEq = std::equal_to<iterator_value_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>; - -template <typename T, - typename Hash = std::hash<T>, - typename KeyEq = std::equal_to<T>, - typename Alloc = tbb::tbb_allocator<T>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_set( std::initializer_list<T>, std::size_t = {}, - Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_set<T, Hash, KeyEq, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_set( It, It, std::size_t, Alloc ) --> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>, - std::equal_to<iterator_value_t<It>>, Alloc>; - -template <typename It, typename Hash, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_set( It, It, std::size_t, Hash, Alloc ) --> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; - -template <typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc ) --> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; - -template <typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_set( std::initializer_list<T>, Alloc ) --> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; - -template <typename T, typename Hash, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc ) --> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Hash, typename KeyEqual, typename Allocator> -void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs, - concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) { - lhs.swap(rhs); -} - -template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, - typename Allocator = tbb::tbb_allocator<Key>> -class concurrent_unordered_multiset - : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>> -{ - using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>; - using base_type = concurrent_unordered_base<traits_type>; -public: - using key_type = typename base_type::key_type; - using value_type = typename base_type::value_type; - using size_type = typename base_type::size_type; - using difference_type = typename base_type::difference_type; - using hasher = typename base_type::hasher; - using key_equal = typename base_type::key_equal; - using allocator_type = typename base_type::allocator_type; - using reference = typename base_type::reference; - using const_reference = typename base_type::const_reference; - using pointer = typename base_type::pointer; - using const_pointer = typename base_type::const_pointer; - using iterator = typename base_type::iterator; - using const_iterator = typename base_type::const_iterator; - using local_iterator = typename base_type::local_iterator; - using const_local_iterator = typename base_type::const_local_iterator; - using node_type = typename base_type::node_type; - - // Include constructors of base_type; - using base_type::base_type; - using base_type::operator=; - - // Required for implicit deduction guides - concurrent_unordered_multiset() = default; - concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default; - concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} - concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default; - concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} - // Required to respect the rule of 5 - concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default; - concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default; - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { - this->internal_merge(source); - } - - template <typename OtherHash, typename OtherKeyEqual> - void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { - this->internal_merge(std::move(source)); - } -}; // class concurrent_unordered_multiset - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename It, - typename Hash = std::hash<iterator_value_t<It>>, - typename KeyEq = std::equal_to<iterator_value_t<It>>, - typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>; - -template <typename T, - typename Hash = std::hash<T>, - typename KeyEq = std::equal_to<T>, - typename Alloc = tbb::tbb_allocator<T>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!is_allocator_v<KeyEq>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {}, - Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) --> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>; - -template <typename It, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multiset( It, It, std::size_t, Alloc ) --> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>, - std::equal_to<iterator_value_t<It>>, Alloc>; - -template <typename It, typename Hash, typename Alloc, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc ) --> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; - -template <typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc ) --> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; - -template <typename T, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_unordered_multiset( std::initializer_list<T>, Alloc ) --> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; - -template <typename T, typename Hash, typename Alloc, - typename = std::enable_if_t<is_allocator_v<Alloc>>, - typename = std::enable_if_t<!is_allocator_v<Hash>>, - typename = std::enable_if_t<!std::is_integral_v<Hash>>> -concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc ) --> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>; - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -template <typename Key, typename Hash, typename KeyEqual, typename Allocator> -void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs, - concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) { - lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - -using detail::d1::concurrent_unordered_set; -using detail::d1::concurrent_unordered_multiset; -using detail::split; - -} // inline namespace v1 -} // namespace tbb - -#endif // __TBB_concurrent_unordered_set_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_unordered_set_H +#define __TBB_concurrent_unordered_set_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_unordered_base.h" +#include "tbb_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> +struct concurrent_unordered_set_traits { + using key_type = Key; + using value_type = key_type; + using allocator_type = Allocator; + using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>; + static constexpr bool allow_multimapping = AllowMultimapping; + + static constexpr const key_type& get_key( const value_type& value ) { + return value; + } +}; // class concurrent_unordered_set_traits + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +class concurrent_unordered_multiset; + +template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_unordered_set + : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>> +{ + using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + // Required for implicit deduction guides + concurrent_unordered_set() = default; + concurrent_unordered_set( const concurrent_unordered_set& ) = default; + concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_set( concurrent_unordered_set&& ) = default; + concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default; + concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default; + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_set + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Hash = std::hash<iterator_value_t<It>>, + typename KeyEq = std::equal_to<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>; + +template <typename T, + typename Hash = std::hash<T>, + typename KeyEq = std::equal_to<T>, + typename Alloc = tbb::tbb_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_set<T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( It, It, std::size_t, Alloc ) +-> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>, + std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc ) +-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( std::initializer_list<T>, Alloc ) +-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_unordered_multiset + : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>> +{ + using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_unordered_multiset() = default; + concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default; + concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default; + concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default; + concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default; + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_multiset + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Hash = std::hash<iterator_value_t<It>>, + typename KeyEq = std::equal_to<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>; + +template <typename T, + typename Hash = std::hash<T>, + typename KeyEq = std::equal_to<T>, + typename Alloc = tbb::tbb_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( It, It, std::size_t, Alloc ) +-> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>, + std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc ) +-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( std::initializer_list<T>, Alloc ) +-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_unordered_set; +using detail::d1::concurrent_unordered_multiset; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_unordered_set_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h index 94a22b92c6..00295f0d5d 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h @@ -1,1114 +1,1114 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_concurrent_vector_H -#define __TBB_concurrent_vector_H - -#include "detail/_namespace_injection.h" -#include "detail/_utils.h" -#include "detail/_assert.h" -#include "detail/_allocator_traits.h" -#include "detail/_segment_table.h" -#include "detail/_containers_helpers.h" -#include "blocked_range.h" -#include "cache_aligned_allocator.h" - -#include <algorithm> -#include <utility> // std::move_if_noexcept -#include <algorithm> -#if __TBB_CPP20_COMPARISONS_PRESENT -#include <compare> -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Vector, typename Value> -class vector_iterator { - using vector_type = Vector; - -public: - using value_type = Value; - using size_type = typename vector_type::size_type; - using difference_type = typename vector_type::difference_type; - using pointer = value_type*; - using reference = value_type&; - using iterator_category = std::random_access_iterator_tag; - - template <typename Vec, typename Val> - friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& ); - - template <typename Vec, typename Val1, typename Val2> - friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); - - template <typename Vec, typename Val1, typename Val2> - friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); - - template <typename Vec, typename Val1, typename Val2> - friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); - - template <typename Vec, typename Val> - friend class vector_iterator; - - template <typename T, typename Allocator> - friend class concurrent_vector; - -private: - vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr ) - : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item) - {} - -public: - vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr) - {} - - vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other ) - : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item) - {} - - vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) { - my_vector = other.my_vector; - my_index = other.my_index; - my_item = other.my_item; - return *this; - } - - vector_iterator operator+( difference_type offset ) const { - return vector_iterator(*my_vector, my_index + offset); - } - - vector_iterator& operator+=( difference_type offset ) { - my_index += offset; - my_item = nullptr; - return *this; - } - - vector_iterator operator-( difference_type offset ) const { - return vector_iterator(*my_vector, my_index - offset); - } - - vector_iterator& operator-=( difference_type offset ) { - my_index -= offset; - my_item = nullptr; - return *this; - } - - reference operator*() const { - value_type *item = my_item; - if (item == nullptr) { - item = &my_vector->internal_subscript(my_index); - } else { - __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache"); - } - return *item; - } - - pointer operator->() const { return &(operator*()); } - - reference operator[]( difference_type k ) const { - return my_vector->internal_subscript(my_index + k); - } - - vector_iterator& operator++() { - ++my_index; - if (my_item != nullptr) { - if (vector_type::is_first_element_in_segment(my_index)) { - // If the iterator crosses a segment boundary, the pointer become invalid - // as possibly next segment is in another memory location - my_item = nullptr; - } else { - ++my_item; - } - } - return *this; - } - - vector_iterator operator++(int) { - vector_iterator result = *this; - ++(*this); - return result; - } - - vector_iterator& operator--() { - __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector"); - --my_index; - if (my_item != nullptr) { - if (vector_type::is_first_element_in_segment(my_index)) { - // If the iterator crosses a segment boundary, the pointer become invalid - // as possibly next segment is in another memory location - my_item = nullptr; - } else { - --my_item; - } - } - return *this; - } - - vector_iterator operator--(int) { - vector_iterator result = *this; - --(*this); - return result; - } - -private: - // concurrent_vector over which we are iterating. - vector_type* my_vector; - - // Index into the vector - size_type my_index; - - // Caches my_vector *it; - // If my_item == nullptr cached value is not available use internal_subscript(my_index) - mutable value_type* my_item; -}; // class vector_iterator - -template <typename Vector, typename T> -vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset, - const vector_iterator<Vector, T>& v ) -{ - return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset); -} - -template <typename Vector, typename T, typename U> -typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i, - const vector_iterator<Vector, U>& j ) -{ - using difference_type = typename vector_iterator<Vector, T>::difference_type; - return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index); -} - -template <typename Vector, typename T, typename U> -bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return i.my_vector == j.my_vector && i.my_index == j.my_index; -} - -template <typename Vector, typename T, typename U> -bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return !(i == j); -} - -template <typename Vector, typename T, typename U> -bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return i.my_index < j.my_index; -} - -template <typename Vector, typename T, typename U> -bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return j < i; -} - -template <typename Vector, typename T, typename U> -bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return !(i < j); -} - -template <typename Vector, typename T, typename U> -bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { - return !(j < i); -} - -static constexpr std::size_t embedded_table_num_segments = 3; - -template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> -class concurrent_vector - : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments> -{ - using self_type = concurrent_vector<T, Allocator>; - using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>; - - friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>; - - template <typename Iterator> - class generic_range_type : public tbb::blocked_range<Iterator> { - using base_type = tbb::blocked_range<Iterator>; - public: - using value_type = T; - using reference = T&; - using const_reference = const T&; - using iterator = Iterator; - using difference_type = std::ptrdiff_t; - - using base_type::base_type; - - template<typename U> - generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {} - generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {} - }; // class generic_range_type - - static_assert(std::is_same<T, typename Allocator::value_type>::value, - "value_type of the container must be the same as its allocator's"); - using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; - // Segment table for concurrent_vector can be extended - static constexpr bool allow_table_extending = true; - static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value || - allocator_traits_type::is_always_equal::value; - static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value || - allocator_traits_type::is_always_equal::value; - -public: - using value_type = T; - using allocator_type = Allocator; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using reference = value_type&; - using const_reference = const value_type&; - - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using iterator = vector_iterator<concurrent_vector, value_type>; - using const_iterator = vector_iterator<concurrent_vector, const value_type>; - using reverse_iterator = std::reverse_iterator<iterator>; - using const_reverse_iterator = std::reverse_iterator<const_iterator>; - - using range_type = generic_range_type<iterator>; - using const_range_type = generic_range_type<const_iterator>; - - concurrent_vector() : concurrent_vector(allocator_type()) {} - - explicit concurrent_vector( const allocator_type& alloc ) noexcept - : base_type(alloc) - {} - - explicit concurrent_vector( size_type count, const value_type& value, - const allocator_type& alloc = allocator_type() ) - : concurrent_vector(alloc) - { - try_call( [&] { - grow_by(count, value); - } ).on_exception( [&] { - base_type::clear(); - }); - } - - explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() ) - : concurrent_vector(alloc) - { - try_call( [&] { - grow_by(count); - } ).on_exception( [&] { - base_type::clear(); - }); - } - - template <typename InputIterator> - concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() ) - : concurrent_vector(alloc) - { - try_call( [&] { - grow_by(first, last); - } ).on_exception( [&] { - base_type::clear(); - }); - } - - concurrent_vector( const concurrent_vector& other ) - : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator())) - { - try_call( [&] { - grow_by(other.begin(), other.end()); - } ).on_exception( [&] { - base_type::clear(); - }); - } - - concurrent_vector( const concurrent_vector& other, const allocator_type& alloc ) - : base_type(other, alloc) {} - - concurrent_vector(concurrent_vector&& other) noexcept - : base_type(std::move(other)) - {} - - concurrent_vector( concurrent_vector&& other, const allocator_type& alloc ) - : base_type(std::move(other), alloc) - {} - - concurrent_vector( std::initializer_list<value_type> init, - const allocator_type& alloc = allocator_type() ) - : concurrent_vector(init.begin(), init.end(), alloc) - {} - - ~concurrent_vector() {} - - // Assignment - concurrent_vector& operator=( const concurrent_vector& other ) { - base_type::operator=(other); - return *this; - } - - concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) { - base_type::operator=(std::move(other)); - return *this; - } - - concurrent_vector& operator=( std::initializer_list<value_type> init ) { - assign(init); - return *this; - } - - void assign( size_type count, const value_type& value ) { - destroy_elements(); - grow_by(count, value); - } - - template <typename InputIterator> - typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type - assign( InputIterator first, InputIterator last ) { - destroy_elements(); - grow_by(first, last); - } - - void assign( std::initializer_list<value_type> init ) { - destroy_elements(); - assign(init.begin(), init.end()); - } - - // Concurrent growth - iterator grow_by( size_type delta ) { - return internal_grow_by_delta(delta); - } - - iterator grow_by( size_type delta, const value_type& value ) { - return internal_grow_by_delta(delta, value); - } - - template <typename ForwardIterator> - typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type - grow_by( ForwardIterator first, ForwardIterator last ) { - auto delta = std::distance(first, last); - return internal_grow_by_delta(delta, first, last); - } - - iterator grow_by( std::initializer_list<value_type> init ) { - return grow_by(init.begin(), init.end()); - } - - iterator grow_to_at_least( size_type n ) { - return internal_grow_to_at_least(n); - } - iterator grow_to_at_least( size_type n, const value_type& value ) { - return internal_grow_to_at_least(n, value); - } - - iterator push_back( const value_type& item ) { - return internal_emplace_back(item); - } - - iterator push_back( value_type&& item ) { - return internal_emplace_back(std::move(item)); - } - - template <typename... Args> - iterator emplace_back( Args&&... args ) { - return internal_emplace_back(std::forward<Args>(args)...); - } - - // Items access - reference operator[]( size_type index ) { - return internal_subscript(index); - } - const_reference operator[]( size_type index ) const { - return internal_subscript(index); - } - - reference at( size_type index ) { - return internal_subscript_with_exceptions(index); - } - const_reference at( size_type index ) const { - return internal_subscript_with_exceptions(index); - } - - // Get range for iterating with parallel algorithms - range_type range( size_t grainsize = 1 ) { - return range_type(begin(), end(), grainsize); - } - - // Get const range for iterating with parallel algorithms - const_range_type range( size_t grainsize = 1 ) const { - return const_range_type(begin(), end(), grainsize); - } - - reference front() { - return internal_subscript(0); - } - - const_reference front() const { - return internal_subscript(0); - } - - reference back() { - return internal_subscript(size() - 1); - } - - const_reference back() const { - return internal_subscript(size() - 1); - } - - // Iterators - iterator begin() { return iterator(*this, 0); } - const_iterator begin() const { return const_iterator(*this, 0); } - const_iterator cbegin() const { return const_iterator(*this, 0); } - - iterator end() { return iterator(*this, size()); } - const_iterator end() const { return const_iterator(*this, size()); } - const_iterator cend() const { return const_iterator(*this, size()); } - - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } - - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } - - allocator_type get_allocator() const { - return base_type::get_allocator(); - } - - // Storage - bool empty() const noexcept { - return 0 == size(); - } - - size_type size() const noexcept { - return std::min(this->my_size.load(std::memory_order_acquire), capacity()); - } - - size_type max_size() const noexcept { - return allocator_traits_type::max_size(base_type::get_allocator()); - } - - size_type capacity() const noexcept { - return base_type::capacity(); - } - - void reserve( size_type n ) { - if (n == 0) return; - - if (n > max_size()) { - tbb::detail::throw_exception(exception_id::reservation_length_error); - } - - this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1); - base_type::reserve(n); - } - - void resize( size_type n ) { - internal_resize(n); - } - - void resize( size_type n, const value_type& val ) { - internal_resize(n, val); - } - - void shrink_to_fit() { - internal_compact(); - } - - void swap(concurrent_vector& other) noexcept(is_noexcept_swap) { - base_type::swap(other); - } - - void clear() { - destroy_elements(); - } - -private: - using segment_type = typename base_type::segment_type; - using segment_table_type = typename base_type::segment_table_type; - using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits; - using segment_index_type = typename base_type::segment_index_type; - - using segment_element_type = typename base_type::value_type; - using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>; - using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>; - - segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) { - __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table"); - - // If other threads are trying to set pointers in the short segment, wait for them to finish their - // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it - for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) { - spin_wait_while_eq(embedded_table[i], segment_type(nullptr)); - } - - // It is possible that the table was extend by a thread allocating first_block, need to check this. - if (this->get_table() != embedded_table) { - return nullptr; - } - - // Allocate long segment table and fill with null pointers - segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table); - // Copy segment pointers from the embedded table - for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) { - segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], - embedded_table[segment_index].load(std::memory_order_relaxed)); - } - for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) { - segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr); - } - - return new_segment_table; - } - - // create_segment function is required by the segment_table base class - segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) { - size_type first_block = this->my_first_block.load(std::memory_order_relaxed); - // First block allocation - if (seg_index < first_block) { - // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested - if (table[0].load(std::memory_order_acquire) != nullptr) { - spin_wait_while_eq(table[seg_index], segment_type(nullptr)); - return nullptr; - } - - segment_element_allocator_type segment_allocator(base_type::get_allocator()); - segment_type new_segment = nullptr; - size_type first_block_size = this->segment_size(first_block); - try_call( [&] { - new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size); - } ).on_exception( [&] { - segment_type disabled_segment = nullptr; - if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) { - size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block; - for (size_type i = 1; i < end_segment; ++i) { - table[i].store(this->segment_allocation_failure_tag, std::memory_order_release); - } - } - }); - - segment_type disabled_segment = nullptr; - if (table[0].compare_exchange_strong(disabled_segment, new_segment)) { - this->extend_table_if_necessary(table, 0, first_block_size); - for (size_type i = 1; i < first_block; ++i) { - table[i].store(new_segment, std::memory_order_release); - } - - // Other threads can wait on a snapshot of an embedded table, need to fill it. - for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) { - this->my_embedded_table[i].store(new_segment, std::memory_order_release); - } - } else if (new_segment != this->segment_allocation_failure_tag) { - // Deallocate the memory - segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size); - // 0 segment is already allocated, then it remains to wait until the segments are filled to requested - spin_wait_while_eq(table[seg_index], segment_type(nullptr)); - } - } else { - size_type offset = this->segment_base(seg_index); - if (index == offset) { - __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment"); - segment_element_allocator_type segment_allocator(base_type::get_allocator()); - segment_type new_segment = this->segment_allocation_failure_tag; - try_call( [&] { - new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index)); - // Shift base address to simplify access by index - new_segment -= this->segment_base(seg_index); - } ).on_completion( [&] { - table[seg_index].store(new_segment, std::memory_order_release); - }); - } else { - spin_wait_while_eq(table[seg_index], segment_type(nullptr)); - } - } - return nullptr; - } - - // Returns the number of elements in the segment to be destroy - size_type number_of_elements_in_segment( segment_index_type seg_index ) { - size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed); - size_type curr_segment_base = this->segment_base(seg_index); - - if (seg_index == 0) { - return std::min(curr_vector_size, this->segment_size(seg_index)); - } else { - // Perhaps the segment is allocated, but there are no elements in it. - if (curr_vector_size < curr_segment_base) { - return 0; - } - return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base; - } - } - - void deallocate_segment( segment_type address, segment_index_type seg_index ) { - segment_element_allocator_type segment_allocator(base_type::get_allocator()); - size_type first_block = this->my_first_block.load(std::memory_order_relaxed); - if (seg_index >= first_block) { - segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index)); - } - else if (seg_index == 0) { - size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0); - segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate); - } - } - - // destroy_segment function is required by the segment_table base class - void destroy_segment( segment_type address, segment_index_type seg_index ) { - size_type elements_to_destroy = number_of_elements_in_segment(seg_index); - segment_element_allocator_type segment_allocator(base_type::get_allocator()); - - for (size_type i = 0; i < elements_to_destroy; ++i) { - segment_element_allocator_traits::destroy(segment_allocator, address + i); - } - - deallocate_segment(address, seg_index); - } - - // copy_segment function is required by the segment_table base class - void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) { - size_type i = 0; - try_call( [&] { - for (; i != number_of_elements_in_segment(seg_index); ++i) { - segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]); - } - } ).on_exception( [&] { - // Zero-initialize items left not constructed after the exception - zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); - - segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); - auto table = this->get_table(); - for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { - auto curr_segment = table[j].load(std::memory_order_relaxed); - if (curr_segment) { - zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); - } - } - this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); - }); - } - - // move_segment function is required by the segment_table base class - void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) { - size_type i = 0; - try_call( [&] { - for (; i != number_of_elements_in_segment(seg_index); ++i) { - segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i])); - } - } ).on_exception( [&] { - // Zero-initialize items left not constructed after the exception - zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); - - segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); - auto table = this->get_table(); - for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { - auto curr_segment = table[j].load(std::memory_order_relaxed); - if (curr_segment) { - zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); - } - } - this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); - }); - } - - static constexpr bool is_first_element_in_segment( size_type index ) { - // An element is the first in a segment if its index is equal to a power of two - return is_power_of_two_at_least(index, 2); - } - - const_reference internal_subscript( size_type index ) const { - return const_cast<self_type*>(this)->internal_subscript(index); - } - - reference internal_subscript( size_type index ) { - __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index"); - return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); - } - - const_reference internal_subscript_with_exceptions( size_type index ) const { - return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index); - } - - reference internal_subscript_with_exceptions( size_type index ) { - if (index >= this->my_size.load(std::memory_order_acquire)) { - tbb::detail::throw_exception(exception_id::out_of_range); - } - - segment_table_type table = this->my_segment_table.load(std::memory_order_acquire); - - size_type seg_index = this->segment_index_of(index); - if (base_type::number_of_segments(table) < seg_index) { - tbb::detail::throw_exception(exception_id::out_of_range); - } - - if (table[seg_index] <= this->segment_allocation_failure_tag) { - tbb::detail::throw_exception(exception_id::out_of_range); - } - - return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); - } - - static void zero_unconstructed_elements( pointer start, size_type count ) { - std::memset(static_cast<void *>(start), 0, count * sizeof(value_type)); - } - - template <typename... Args> - iterator internal_emplace_back( Args&&... args ) { - size_type old_size = this->my_size++; - this->assign_first_block_if_necessary(default_first_block_size); - auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size); - - // try_call API is not convenient here due to broken - // variadic capture on GCC 4.8.5 - auto value_guard = make_raii_guard([&] { - zero_unconstructed_elements(element_address, /*count =*/1); - }); - - segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...); - value_guard.dismiss(); - return iterator(*this, old_size, element_address); - } - - template <typename... Args> - void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) { - static_assert(sizeof...(Args) < 2, "Too many parameters"); - for (size_type idx = start_idx; idx < end_idx; ++idx) { - auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); - // try_call API is not convenient here due to broken - // variadic capture on GCC 4.8.5 - auto value_guard = make_raii_guard( [&] { - segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); - size_type segment_size = this->segment_size(last_allocated_segment); - end_idx = end_idx < segment_size ? end_idx : segment_size; - for (size_type i = idx; i < end_idx; ++i) { - zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); - } - }); - segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...); - value_guard.dismiss(); - } - } - - template <typename ForwardIterator> - void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) { - for (size_type idx = start_idx; idx < end_idx; ++idx) { - auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); - try_call( [&] { - segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++); - } ).on_exception( [&] { - segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); - size_type segment_size = this->segment_size(last_allocated_segment); - end_idx = end_idx < segment_size ? end_idx : segment_size; - for (size_type i = idx; i < end_idx; ++i) { - zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); - } - }); - } - } - - template <typename... Args> - iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) { - this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1); - size_type seg_index = this->segment_index_of(end_idx - 1); - segment_table_type table = this->get_table(); - this->extend_table_if_necessary(table, start_idx, end_idx); - - if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) { - // So that other threads be able to work with the last segment of grow_by, allocate it immediately. - // If the last segment is not less than the first block - if (table[seg_index].load(std::memory_order_relaxed) == nullptr) { - size_type first_element = this->segment_base(seg_index); - if (first_element >= start_idx && first_element < end_idx) { - segment_type segment = table[seg_index].load(std::memory_order_relaxed); - base_type::enable_segment(segment, table, seg_index, first_element); - } - } - } - - internal_loop_construct(table, start_idx, end_idx, args...); - - return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx)); - } - - - template <typename... Args> - iterator internal_grow_by_delta( size_type delta, const Args&... args ) { - if (delta == size_type(0)) { - return end(); - } - size_type start_idx = this->my_size.fetch_add(delta); - size_type end_idx = start_idx + delta; - - return internal_grow(start_idx, end_idx, args...); - } - - template <typename... Args> - iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) { - size_type old_size = this->my_size.load(std::memory_order_relaxed); - if (new_size == size_type(0)) return iterator(*this, 0); - while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size)) - {} - - int delta = static_cast<int>(new_size) - static_cast<int>(old_size); - if (delta > 0) { - return internal_grow(old_size, new_size, args...); - } - - size_type end_segment = this->segment_index_of(new_size - 1); - - // Check/wait for segments allocation completes - if (end_segment >= this->pointers_per_embedded_table && - this->get_table() == this->my_embedded_table) - { - spin_wait_while_eq(this->my_segment_table, this->my_embedded_table); - } - - for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) { - if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { - atomic_backoff backoff(true); - while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { - backoff.pause(); - } - } - } - - #if TBB_USE_DEBUG - size_type cap = capacity(); - __TBB_ASSERT( cap >= new_size, NULL); - #endif - return iterator(*this, size()); - } - - template <typename... Args> - void internal_resize( size_type n, const Args&... args ) { - if (n == 0) { - clear(); - return; - } - - size_type old_size = this->my_size.load(std::memory_order_acquire); - if (n > old_size) { - reserve(n); - grow_to_at_least(n, args...); - } else { - if (old_size == n) { - return; - } - size_type last_segment = this->segment_index_of(old_size - 1); - // Delete segments - for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) { - this->delete_segment(seg_idx); - } - - // If n > segment_size(n) => we need to destroy all of the items in the first segment - // Otherwise, we need to destroy only items with the index < n - size_type n_segment = this->segment_index_of(n - 1); - size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size); - // Destroy elements in curr segment - for (size_type idx = n; idx < last_index_to_destroy; ++idx) { - segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx)); - } - this->my_size.store(n, std::memory_order_release); - } - } - - void destroy_elements() { - allocator_type alloc(base_type::get_allocator()); - for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) { - allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i)); - } - this->my_size.store(0, std::memory_order_relaxed); - } - - static bool incompact_predicate( size_type size ) { - // memory page size - const size_type page_size = 4096; - return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128); - } - - void internal_compact() { - const size_type curr_size = this->my_size.load(std::memory_order_relaxed); - segment_table_type table = this->get_table(); - const segment_index_type k_end = this->find_last_allocated_segment(table); // allocated segments - const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;.. - const segment_index_type first_block = this->my_first_block; // number of merged segments, getting values from atomics - - segment_index_type k = first_block; - if (k_stop < first_block) { - k = k_stop; - } - else { - while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++; - } - - if (k_stop == k_end && k == first_block) { - return; - } - - // First segment optimization - if (k != first_block && k) { - size_type max_block = std::max(first_block, k); - - auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block); - - for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { - segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx], - table[seg_idx].load(std::memory_order_relaxed)); - table[seg_idx].store(nullptr, std::memory_order_relaxed); - } - - this->my_first_block.store(k, std::memory_order_relaxed); - size_type index = 0; - try_call( [&] { - for (; index < std::min(this->segment_size(max_block), curr_size); ++index) { - auto element_address = &static_cast<base_type*>(this)->operator[](index); - segment_index_type seg_idx = this->segment_index_of(index); - segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, - std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index])); - } - } ).on_exception( [&] { - segment_element_allocator_type allocator(base_type::get_allocator()); - for (size_type i = 0; i < index; ++i) { - auto element_adress = &this->operator[](i); - segment_element_allocator_traits::destroy(allocator, element_adress); - } - segment_element_allocator_traits::deallocate(allocator, - table[0].load(std::memory_order_relaxed), this->segment_size(max_block)); - - for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { - table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed), - std::memory_order_relaxed); - buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed); - } - segment_table_allocator_traits::deallocate(base_type::get_allocator(), - buffer_table, max_block); - this->my_first_block.store(first_block, std::memory_order_relaxed); - }); - - // Need to correct deallocate old segments - // Method destroy_segment respect active first_block, therefore, - // in order for the segment deletion to work correctly, set the first_block size that was earlier, - // destroy the unnecessary segments. - this->my_first_block.store(first_block, std::memory_order_relaxed); - for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) { - auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed); - if (curr_segment != nullptr) { - destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1), - seg_idx - 1); - } - } - - this->my_first_block.store(k, std::memory_order_relaxed); - - for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { - segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]); - } - - segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); - } - // free unnecessary segments allocated by reserve() call - if (k_stop < k_end) { - for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) { - if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) { - this->delete_segment(seg_idx - 1); - } - } - if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; - } - } - - // Lever for adjusting the size of first_block at the very first insertion. - // TODO: consider >1 value, check performance - static constexpr size_type default_first_block_size = 1; - - template <typename Vector, typename Value> - friend class vector_iterator; -}; // class concurrent_vector - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -// Deduction guide for the constructor from two iterators -template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, - typename = std::enable_if_t<is_input_iterator_v<It>>, - typename = std::enable_if_t<is_allocator_v<Alloc>>> -concurrent_vector( It, It, Alloc = Alloc() ) --> concurrent_vector<iterator_value_t<It>, Alloc>; -#endif - -template <typename T, typename Allocator> -void swap(concurrent_vector<T, Allocator> &lhs, - concurrent_vector<T, Allocator> &rhs) -{ - lhs.swap(rhs); -} - -template <typename T, typename Allocator> -bool operator==(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); -} - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template <typename T, typename Allocator> -bool operator!=(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return !(lhs == rhs); -} -#endif // !__TBB_CPP20_COMPARISONS_PRESENT - -#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT -template <typename T, typename Allocator> -tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type> -operator<=>(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), - rhs.begin(), rhs.end(), - tbb::detail::synthesized_three_way_comparator{}); -} - -#else - -template <typename T, typename Allocator> -bool operator<(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); -} - -template <typename T, typename Allocator> -bool operator<=(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return !(rhs < lhs); -} - -template <typename T, typename Allocator> -bool operator>(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return rhs < lhs; -} - -template <typename T, typename Allocator> -bool operator>=(const concurrent_vector<T, Allocator> &lhs, - const concurrent_vector<T, Allocator> &rhs) -{ - return !(lhs < rhs); -} -#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::d1::concurrent_vector; -} // namespace v1 - -} // namespace tbb - -#endif // __TBB_concurrent_vector_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_vector_H +#define __TBB_concurrent_vector_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_assert.h" +#include "detail/_allocator_traits.h" +#include "detail/_segment_table.h" +#include "detail/_containers_helpers.h" +#include "blocked_range.h" +#include "cache_aligned_allocator.h" + +#include <algorithm> +#include <utility> // std::move_if_noexcept +#include <algorithm> +#if __TBB_CPP20_COMPARISONS_PRESENT +#include <compare> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Vector, typename Value> +class vector_iterator { + using vector_type = Vector; + +public: + using value_type = Value; + using size_type = typename vector_type::size_type; + using difference_type = typename vector_type::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::random_access_iterator_tag; + + template <typename Vec, typename Val> + friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& ); + + template <typename Vec, typename Val1, typename Val2> + friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val1, typename Val2> + friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val1, typename Val2> + friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val> + friend class vector_iterator; + + template <typename T, typename Allocator> + friend class concurrent_vector; + +private: + vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr ) + : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item) + {} + +public: + vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr) + {} + + vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other ) + : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item) + {} + + vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) { + my_vector = other.my_vector; + my_index = other.my_index; + my_item = other.my_item; + return *this; + } + + vector_iterator operator+( difference_type offset ) const { + return vector_iterator(*my_vector, my_index + offset); + } + + vector_iterator& operator+=( difference_type offset ) { + my_index += offset; + my_item = nullptr; + return *this; + } + + vector_iterator operator-( difference_type offset ) const { + return vector_iterator(*my_vector, my_index - offset); + } + + vector_iterator& operator-=( difference_type offset ) { + my_index -= offset; + my_item = nullptr; + return *this; + } + + reference operator*() const { + value_type *item = my_item; + if (item == nullptr) { + item = &my_vector->internal_subscript(my_index); + } else { + __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache"); + } + return *item; + } + + pointer operator->() const { return &(operator*()); } + + reference operator[]( difference_type k ) const { + return my_vector->internal_subscript(my_index + k); + } + + vector_iterator& operator++() { + ++my_index; + if (my_item != nullptr) { + if (vector_type::is_first_element_in_segment(my_index)) { + // If the iterator crosses a segment boundary, the pointer become invalid + // as possibly next segment is in another memory location + my_item = nullptr; + } else { + ++my_item; + } + } + return *this; + } + + vector_iterator operator++(int) { + vector_iterator result = *this; + ++(*this); + return result; + } + + vector_iterator& operator--() { + __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector"); + --my_index; + if (my_item != nullptr) { + if (vector_type::is_first_element_in_segment(my_index)) { + // If the iterator crosses a segment boundary, the pointer become invalid + // as possibly next segment is in another memory location + my_item = nullptr; + } else { + --my_item; + } + } + return *this; + } + + vector_iterator operator--(int) { + vector_iterator result = *this; + --(*this); + return result; + } + +private: + // concurrent_vector over which we are iterating. + vector_type* my_vector; + + // Index into the vector + size_type my_index; + + // Caches my_vector *it; + // If my_item == nullptr cached value is not available use internal_subscript(my_index) + mutable value_type* my_item; +}; // class vector_iterator + +template <typename Vector, typename T> +vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset, + const vector_iterator<Vector, T>& v ) +{ + return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset); +} + +template <typename Vector, typename T, typename U> +typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i, + const vector_iterator<Vector, U>& j ) +{ + using difference_type = typename vector_iterator<Vector, T>::difference_type; + return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index); +} + +template <typename Vector, typename T, typename U> +bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return i.my_vector == j.my_vector && i.my_index == j.my_index; +} + +template <typename Vector, typename T, typename U> +bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(i == j); +} + +template <typename Vector, typename T, typename U> +bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return i.my_index < j.my_index; +} + +template <typename Vector, typename T, typename U> +bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return j < i; +} + +template <typename Vector, typename T, typename U> +bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(i < j); +} + +template <typename Vector, typename T, typename U> +bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(j < i); +} + +static constexpr std::size_t embedded_table_num_segments = 3; + +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_vector + : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments> +{ + using self_type = concurrent_vector<T, Allocator>; + using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>; + + friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>; + + template <typename Iterator> + class generic_range_type : public tbb::blocked_range<Iterator> { + using base_type = tbb::blocked_range<Iterator>; + public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + using iterator = Iterator; + using difference_type = std::ptrdiff_t; + + using base_type::base_type; + + template<typename U> + generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {} + generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {} + }; // class generic_range_type + + static_assert(std::is_same<T, typename Allocator::value_type>::value, + "value_type of the container must be the same as its allocator's"); + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + // Segment table for concurrent_vector can be extended + static constexpr bool allow_table_extending = true; + static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value || + allocator_traits_type::is_always_equal::value; + static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value || + allocator_traits_type::is_always_equal::value; + +public: + using value_type = T; + using allocator_type = Allocator; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = vector_iterator<concurrent_vector, value_type>; + using const_iterator = vector_iterator<concurrent_vector, const value_type>; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + using range_type = generic_range_type<iterator>; + using const_range_type = generic_range_type<const_iterator>; + + concurrent_vector() : concurrent_vector(allocator_type()) {} + + explicit concurrent_vector( const allocator_type& alloc ) noexcept + : base_type(alloc) + {} + + explicit concurrent_vector( size_type count, const value_type& value, + const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(count, value); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(count); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + template <typename InputIterator> + concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(first, last); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + concurrent_vector( const concurrent_vector& other ) + : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator())) + { + try_call( [&] { + grow_by(other.begin(), other.end()); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + concurrent_vector( const concurrent_vector& other, const allocator_type& alloc ) + : base_type(other, alloc) {} + + concurrent_vector(concurrent_vector&& other) noexcept + : base_type(std::move(other)) + {} + + concurrent_vector( concurrent_vector&& other, const allocator_type& alloc ) + : base_type(std::move(other), alloc) + {} + + concurrent_vector( std::initializer_list<value_type> init, + const allocator_type& alloc = allocator_type() ) + : concurrent_vector(init.begin(), init.end(), alloc) + {} + + ~concurrent_vector() {} + + // Assignment + concurrent_vector& operator=( const concurrent_vector& other ) { + base_type::operator=(other); + return *this; + } + + concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) { + base_type::operator=(std::move(other)); + return *this; + } + + concurrent_vector& operator=( std::initializer_list<value_type> init ) { + assign(init); + return *this; + } + + void assign( size_type count, const value_type& value ) { + destroy_elements(); + grow_by(count, value); + } + + template <typename InputIterator> + typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type + assign( InputIterator first, InputIterator last ) { + destroy_elements(); + grow_by(first, last); + } + + void assign( std::initializer_list<value_type> init ) { + destroy_elements(); + assign(init.begin(), init.end()); + } + + // Concurrent growth + iterator grow_by( size_type delta ) { + return internal_grow_by_delta(delta); + } + + iterator grow_by( size_type delta, const value_type& value ) { + return internal_grow_by_delta(delta, value); + } + + template <typename ForwardIterator> + typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type + grow_by( ForwardIterator first, ForwardIterator last ) { + auto delta = std::distance(first, last); + return internal_grow_by_delta(delta, first, last); + } + + iterator grow_by( std::initializer_list<value_type> init ) { + return grow_by(init.begin(), init.end()); + } + + iterator grow_to_at_least( size_type n ) { + return internal_grow_to_at_least(n); + } + iterator grow_to_at_least( size_type n, const value_type& value ) { + return internal_grow_to_at_least(n, value); + } + + iterator push_back( const value_type& item ) { + return internal_emplace_back(item); + } + + iterator push_back( value_type&& item ) { + return internal_emplace_back(std::move(item)); + } + + template <typename... Args> + iterator emplace_back( Args&&... args ) { + return internal_emplace_back(std::forward<Args>(args)...); + } + + // Items access + reference operator[]( size_type index ) { + return internal_subscript(index); + } + const_reference operator[]( size_type index ) const { + return internal_subscript(index); + } + + reference at( size_type index ) { + return internal_subscript_with_exceptions(index); + } + const_reference at( size_type index ) const { + return internal_subscript_with_exceptions(index); + } + + // Get range for iterating with parallel algorithms + range_type range( size_t grainsize = 1 ) { + return range_type(begin(), end(), grainsize); + } + + // Get const range for iterating with parallel algorithms + const_range_type range( size_t grainsize = 1 ) const { + return const_range_type(begin(), end(), grainsize); + } + + reference front() { + return internal_subscript(0); + } + + const_reference front() const { + return internal_subscript(0); + } + + reference back() { + return internal_subscript(size() - 1); + } + + const_reference back() const { + return internal_subscript(size() - 1); + } + + // Iterators + iterator begin() { return iterator(*this, 0); } + const_iterator begin() const { return const_iterator(*this, 0); } + const_iterator cbegin() const { return const_iterator(*this, 0); } + + iterator end() { return iterator(*this, size()); } + const_iterator end() const { return const_iterator(*this, size()); } + const_iterator cend() const { return const_iterator(*this, size()); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + allocator_type get_allocator() const { + return base_type::get_allocator(); + } + + // Storage + bool empty() const noexcept { + return 0 == size(); + } + + size_type size() const noexcept { + return std::min(this->my_size.load(std::memory_order_acquire), capacity()); + } + + size_type max_size() const noexcept { + return allocator_traits_type::max_size(base_type::get_allocator()); + } + + size_type capacity() const noexcept { + return base_type::capacity(); + } + + void reserve( size_type n ) { + if (n == 0) return; + + if (n > max_size()) { + tbb::detail::throw_exception(exception_id::reservation_length_error); + } + + this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1); + base_type::reserve(n); + } + + void resize( size_type n ) { + internal_resize(n); + } + + void resize( size_type n, const value_type& val ) { + internal_resize(n, val); + } + + void shrink_to_fit() { + internal_compact(); + } + + void swap(concurrent_vector& other) noexcept(is_noexcept_swap) { + base_type::swap(other); + } + + void clear() { + destroy_elements(); + } + +private: + using segment_type = typename base_type::segment_type; + using segment_table_type = typename base_type::segment_table_type; + using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits; + using segment_index_type = typename base_type::segment_index_type; + + using segment_element_type = typename base_type::value_type; + using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>; + using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>; + + segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) { + __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table"); + + // If other threads are trying to set pointers in the short segment, wait for them to finish their + // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it + for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) { + spin_wait_while_eq(embedded_table[i], segment_type(nullptr)); + } + + // It is possible that the table was extend by a thread allocating first_block, need to check this. + if (this->get_table() != embedded_table) { + return nullptr; + } + + // Allocate long segment table and fill with null pointers + segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table); + // Copy segment pointers from the embedded table + for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], + embedded_table[segment_index].load(std::memory_order_relaxed)); + } + for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr); + } + + return new_segment_table; + } + + // create_segment function is required by the segment_table base class + segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) { + size_type first_block = this->my_first_block.load(std::memory_order_relaxed); + // First block allocation + if (seg_index < first_block) { + // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested + if (table[0].load(std::memory_order_acquire) != nullptr) { + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + return nullptr; + } + + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + segment_type new_segment = nullptr; + size_type first_block_size = this->segment_size(first_block); + try_call( [&] { + new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size); + } ).on_exception( [&] { + segment_type disabled_segment = nullptr; + if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) { + size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block; + for (size_type i = 1; i < end_segment; ++i) { + table[i].store(this->segment_allocation_failure_tag, std::memory_order_release); + } + } + }); + + segment_type disabled_segment = nullptr; + if (table[0].compare_exchange_strong(disabled_segment, new_segment)) { + this->extend_table_if_necessary(table, 0, first_block_size); + for (size_type i = 1; i < first_block; ++i) { + table[i].store(new_segment, std::memory_order_release); + } + + // Other threads can wait on a snapshot of an embedded table, need to fill it. + for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) { + this->my_embedded_table[i].store(new_segment, std::memory_order_release); + } + } else if (new_segment != this->segment_allocation_failure_tag) { + // Deallocate the memory + segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size); + // 0 segment is already allocated, then it remains to wait until the segments are filled to requested + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + } + } else { + size_type offset = this->segment_base(seg_index); + if (index == offset) { + __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment"); + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + segment_type new_segment = this->segment_allocation_failure_tag; + try_call( [&] { + new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index)); + // Shift base address to simplify access by index + new_segment -= this->segment_base(seg_index); + } ).on_completion( [&] { + table[seg_index].store(new_segment, std::memory_order_release); + }); + } else { + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + } + } + return nullptr; + } + + // Returns the number of elements in the segment to be destroy + size_type number_of_elements_in_segment( segment_index_type seg_index ) { + size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed); + size_type curr_segment_base = this->segment_base(seg_index); + + if (seg_index == 0) { + return std::min(curr_vector_size, this->segment_size(seg_index)); + } else { + // Perhaps the segment is allocated, but there are no elements in it. + if (curr_vector_size < curr_segment_base) { + return 0; + } + return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base; + } + } + + void deallocate_segment( segment_type address, segment_index_type seg_index ) { + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + size_type first_block = this->my_first_block.load(std::memory_order_relaxed); + if (seg_index >= first_block) { + segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index)); + } + else if (seg_index == 0) { + size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0); + segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate); + } + } + + // destroy_segment function is required by the segment_table base class + void destroy_segment( segment_type address, segment_index_type seg_index ) { + size_type elements_to_destroy = number_of_elements_in_segment(seg_index); + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + + for (size_type i = 0; i < elements_to_destroy; ++i) { + segment_element_allocator_traits::destroy(segment_allocator, address + i); + } + + deallocate_segment(address, seg_index); + } + + // copy_segment function is required by the segment_table base class + void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) { + size_type i = 0; + try_call( [&] { + for (; i != number_of_elements_in_segment(seg_index); ++i) { + segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]); + } + } ).on_exception( [&] { + // Zero-initialize items left not constructed after the exception + zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); + + segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); + auto table = this->get_table(); + for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { + auto curr_segment = table[j].load(std::memory_order_relaxed); + if (curr_segment) { + zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); + } + } + this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); + }); + } + + // move_segment function is required by the segment_table base class + void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) { + size_type i = 0; + try_call( [&] { + for (; i != number_of_elements_in_segment(seg_index); ++i) { + segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i])); + } + } ).on_exception( [&] { + // Zero-initialize items left not constructed after the exception + zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); + + segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); + auto table = this->get_table(); + for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { + auto curr_segment = table[j].load(std::memory_order_relaxed); + if (curr_segment) { + zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); + } + } + this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); + }); + } + + static constexpr bool is_first_element_in_segment( size_type index ) { + // An element is the first in a segment if its index is equal to a power of two + return is_power_of_two_at_least(index, 2); + } + + const_reference internal_subscript( size_type index ) const { + return const_cast<self_type*>(this)->internal_subscript(index); + } + + reference internal_subscript( size_type index ) { + __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index"); + return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); + } + + const_reference internal_subscript_with_exceptions( size_type index ) const { + return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index); + } + + reference internal_subscript_with_exceptions( size_type index ) { + if (index >= this->my_size.load(std::memory_order_acquire)) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + segment_table_type table = this->my_segment_table.load(std::memory_order_acquire); + + size_type seg_index = this->segment_index_of(index); + if (base_type::number_of_segments(table) < seg_index) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + if (table[seg_index] <= this->segment_allocation_failure_tag) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); + } + + static void zero_unconstructed_elements( pointer start, size_type count ) { + std::memset(static_cast<void *>(start), 0, count * sizeof(value_type)); + } + + template <typename... Args> + iterator internal_emplace_back( Args&&... args ) { + size_type old_size = this->my_size++; + this->assign_first_block_if_necessary(default_first_block_size); + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + zero_unconstructed_elements(element_address, /*count =*/1); + }); + + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...); + value_guard.dismiss(); + return iterator(*this, old_size, element_address); + } + + template <typename... Args> + void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) { + static_assert(sizeof...(Args) < 2, "Too many parameters"); + for (size_type idx = start_idx; idx < end_idx; ++idx) { + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard( [&] { + segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); + size_type segment_size = this->segment_size(last_allocated_segment); + end_idx = end_idx < segment_size ? end_idx : segment_size; + for (size_type i = idx; i < end_idx; ++i) { + zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); + } + }); + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...); + value_guard.dismiss(); + } + } + + template <typename ForwardIterator> + void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) { + for (size_type idx = start_idx; idx < end_idx; ++idx) { + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); + try_call( [&] { + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++); + } ).on_exception( [&] { + segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); + size_type segment_size = this->segment_size(last_allocated_segment); + end_idx = end_idx < segment_size ? end_idx : segment_size; + for (size_type i = idx; i < end_idx; ++i) { + zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); + } + }); + } + } + + template <typename... Args> + iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) { + this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1); + size_type seg_index = this->segment_index_of(end_idx - 1); + segment_table_type table = this->get_table(); + this->extend_table_if_necessary(table, start_idx, end_idx); + + if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) { + // So that other threads be able to work with the last segment of grow_by, allocate it immediately. + // If the last segment is not less than the first block + if (table[seg_index].load(std::memory_order_relaxed) == nullptr) { + size_type first_element = this->segment_base(seg_index); + if (first_element >= start_idx && first_element < end_idx) { + segment_type segment = table[seg_index].load(std::memory_order_relaxed); + base_type::enable_segment(segment, table, seg_index, first_element); + } + } + } + + internal_loop_construct(table, start_idx, end_idx, args...); + + return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx)); + } + + + template <typename... Args> + iterator internal_grow_by_delta( size_type delta, const Args&... args ) { + if (delta == size_type(0)) { + return end(); + } + size_type start_idx = this->my_size.fetch_add(delta); + size_type end_idx = start_idx + delta; + + return internal_grow(start_idx, end_idx, args...); + } + + template <typename... Args> + iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) { + size_type old_size = this->my_size.load(std::memory_order_relaxed); + if (new_size == size_type(0)) return iterator(*this, 0); + while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size)) + {} + + int delta = static_cast<int>(new_size) - static_cast<int>(old_size); + if (delta > 0) { + return internal_grow(old_size, new_size, args...); + } + + size_type end_segment = this->segment_index_of(new_size - 1); + + // Check/wait for segments allocation completes + if (end_segment >= this->pointers_per_embedded_table && + this->get_table() == this->my_embedded_table) + { + spin_wait_while_eq(this->my_segment_table, this->my_embedded_table); + } + + for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) { + if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { + atomic_backoff backoff(true); + while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { + backoff.pause(); + } + } + } + + #if TBB_USE_DEBUG + size_type cap = capacity(); + __TBB_ASSERT( cap >= new_size, NULL); + #endif + return iterator(*this, size()); + } + + template <typename... Args> + void internal_resize( size_type n, const Args&... args ) { + if (n == 0) { + clear(); + return; + } + + size_type old_size = this->my_size.load(std::memory_order_acquire); + if (n > old_size) { + reserve(n); + grow_to_at_least(n, args...); + } else { + if (old_size == n) { + return; + } + size_type last_segment = this->segment_index_of(old_size - 1); + // Delete segments + for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) { + this->delete_segment(seg_idx); + } + + // If n > segment_size(n) => we need to destroy all of the items in the first segment + // Otherwise, we need to destroy only items with the index < n + size_type n_segment = this->segment_index_of(n - 1); + size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size); + // Destroy elements in curr segment + for (size_type idx = n; idx < last_index_to_destroy; ++idx) { + segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx)); + } + this->my_size.store(n, std::memory_order_release); + } + } + + void destroy_elements() { + allocator_type alloc(base_type::get_allocator()); + for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) { + allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i)); + } + this->my_size.store(0, std::memory_order_relaxed); + } + + static bool incompact_predicate( size_type size ) { + // memory page size + const size_type page_size = 4096; + return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128); + } + + void internal_compact() { + const size_type curr_size = this->my_size.load(std::memory_order_relaxed); + segment_table_type table = this->get_table(); + const segment_index_type k_end = this->find_last_allocated_segment(table); // allocated segments + const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;.. + const segment_index_type first_block = this->my_first_block; // number of merged segments, getting values from atomics + + segment_index_type k = first_block; + if (k_stop < first_block) { + k = k_stop; + } + else { + while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++; + } + + if (k_stop == k_end && k == first_block) { + return; + } + + // First segment optimization + if (k != first_block && k) { + size_type max_block = std::max(first_block, k); + + auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx], + table[seg_idx].load(std::memory_order_relaxed)); + table[seg_idx].store(nullptr, std::memory_order_relaxed); + } + + this->my_first_block.store(k, std::memory_order_relaxed); + size_type index = 0; + try_call( [&] { + for (; index < std::min(this->segment_size(max_block), curr_size); ++index) { + auto element_address = &static_cast<base_type*>(this)->operator[](index); + segment_index_type seg_idx = this->segment_index_of(index); + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, + std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index])); + } + } ).on_exception( [&] { + segment_element_allocator_type allocator(base_type::get_allocator()); + for (size_type i = 0; i < index; ++i) { + auto element_adress = &this->operator[](i); + segment_element_allocator_traits::destroy(allocator, element_adress); + } + segment_element_allocator_traits::deallocate(allocator, + table[0].load(std::memory_order_relaxed), this->segment_size(max_block)); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed), + std::memory_order_relaxed); + buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed); + } + segment_table_allocator_traits::deallocate(base_type::get_allocator(), + buffer_table, max_block); + this->my_first_block.store(first_block, std::memory_order_relaxed); + }); + + // Need to correct deallocate old segments + // Method destroy_segment respect active first_block, therefore, + // in order for the segment deletion to work correctly, set the first_block size that was earlier, + // destroy the unnecessary segments. + this->my_first_block.store(first_block, std::memory_order_relaxed); + for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) { + auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed); + if (curr_segment != nullptr) { + destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1), + seg_idx - 1); + } + } + + this->my_first_block.store(k, std::memory_order_relaxed); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]); + } + + segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); + } + // free unnecessary segments allocated by reserve() call + if (k_stop < k_end) { + for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) { + if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) { + this->delete_segment(seg_idx - 1); + } + } + if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; + } + } + + // Lever for adjusting the size of first_block at the very first insertion. + // TODO: consider >1 value, check performance + static constexpr size_type default_first_block_size = 1; + + template <typename Vector, typename Value> + friend class vector_iterator; +}; // class concurrent_vector + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_vector( It, It, Alloc = Alloc() ) +-> concurrent_vector<iterator_value_t<It>, Alloc>; +#endif + +template <typename T, typename Allocator> +void swap(concurrent_vector<T, Allocator> &lhs, + concurrent_vector<T, Allocator> &rhs) +{ + lhs.swap(rhs); +} + +template <typename T, typename Allocator> +bool operator==(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename T, typename Allocator> +bool operator!=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(lhs == rhs); +} +#endif // !__TBB_CPP20_COMPARISONS_PRESENT + +#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT +template <typename T, typename Allocator> +tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type> +operator<=>(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), + rhs.begin(), rhs.end(), + tbb::detail::synthesized_three_way_comparator{}); +} + +#else + +template <typename T, typename Allocator> +bool operator<(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename T, typename Allocator> +bool operator<=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(rhs < lhs); +} + +template <typename T, typename Allocator> +bool operator>(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return rhs < lhs; +} + +template <typename T, typename Allocator> +bool operator>=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(lhs < rhs); +} +#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::concurrent_vector; +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_concurrent_vector_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h index 40ba64e43d..83598bbd0d 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h @@ -1,173 +1,173 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - - -#ifndef __TBB_detail__aggregator_H -#define __TBB_detail__aggregator_H - -#include "_assert.h" -#include "_utils.h" -#include <atomic> -#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc -#include "../profiling.h" -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -// Base class for aggregated operation -template <typename Derived> -class aggregated_operation { -public: - // Zero value means "wait" status, all other values are "user" specified values and - // are defined into the scope of a class which uses "status" - std::atomic<uintptr_t> status; - - std::atomic<Derived*> next; - aggregated_operation() : status{}, next(nullptr) {} -}; // class aggregated_operation - -// Aggregator base class -/* An aggregator for collecting operations coming from multiple sources and executing - them serially on a single thread. OperationType must be derived from - aggregated_operation. The parameter HandlerType is a functor that will be passed the - list of operations and is expected to handle each operation appropriately, setting the - status of each operation to non-zero. */ -template <typename OperationType> -class aggregator_generic { -public: - aggregator_generic() : pending_operations(nullptr), handler_busy(false) {} - - // Execute an operation - /* Places an operation into the waitlist (pending_operations), and either handles the list, - or waits for the operation to complete, or returns. - The long_life_time parameter specifies the life time of the given operation object. - Operations with long_life_time == true may be accessed after execution. - A "short" life time operation (long_life_time == false) can be destroyed - during execution, and so any access to it after it was put into the waitlist, - including status check, is invalid. As a consequence, waiting for completion - of such operation causes undefined behavior. */ - template <typename HandlerType> - void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) { - // op->status should be read before inserting the operation into the - // aggregator waitlist since it can become invalid after executing a - // handler (if the operation has 'short' life time.) - const uintptr_t status = op->status.load(std::memory_order_relaxed); - - // ITT note: &(op->status) tag is used to cover accesses to this op node. This - // thread has created the operation, and now releases it so that the handler - // thread may handle the associated operation w/o triggering a race condition; - // thus this tag will be acquired just before the operation is handled in the - // handle_operations functor. - call_itt_notify(releasing, &(op->status)); - // insert the operation in the queue. - OperationType* res = pending_operations.load(std::memory_order_relaxed); - do { - op->next.store(res, std::memory_order_relaxed); - } while (!pending_operations.compare_exchange_strong(res, op)); - if (!res) { // first in the list; handle the operations - // ITT note: &pending_operations tag covers access to the handler_busy flag, - // which this waiting handler thread will try to set before entering - // handle_operations. - call_itt_notify(acquired, &pending_operations); - start_handle_operations(handle_operations); - // The operation with 'short' life time can already be destroyed - if (long_life_time) - __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL); - } - // Not first; wait for op to be ready - else if (!status) { // operation is blocking here. - __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing"); - call_itt_notify(prepare, &(op->status)); - spin_wait_while_eq(op->status, uintptr_t(0)); - } - } - -private: - // Trigger the handling of operations when the handler is free - template <typename HandlerType> - void start_handle_operations( HandlerType& handle_operations ) { - OperationType* op_list; - - // ITT note: &handler_busy tag covers access to pending_operations as it is passed - // between active and waiting handlers. Below, the waiting handler waits until - // the active handler releases, and the waiting handler acquires &handler_busy as - // it becomes the active_handler. The release point is at the end of this - // function, when all operations in pending_operations have been handled by the - // owner of this aggregator. - call_itt_notify(prepare, &handler_busy); - // get the handler_busy: - // only one thread can possibly spin here at a time - spin_wait_until_eq(handler_busy, uintptr_t(0)); - call_itt_notify(acquired, &handler_busy); - // acquire fence not necessary here due to causality rule and surrounding atomics - handler_busy.store(1, std::memory_order_relaxed); - - // ITT note: &pending_operations tag covers access to the handler_busy flag - // itself. Capturing the state of the pending_operations signifies that - // handler_busy has been set and a new active handler will now process that list's - // operations. - call_itt_notify(releasing, &pending_operations); - // grab pending_operations - op_list = pending_operations.exchange(nullptr); - - // handle all the operations - handle_operations(op_list); - - // release the handler - handler_busy.store(0, std::memory_order_release); - } - - // An atomically updated list (aka mailbox) of pending operations - std::atomic<OperationType*> pending_operations; - // Controls threads access to handle_operations - std::atomic<uintptr_t> handler_busy; -}; // class aggregator_generic - -template <typename HandlerType, typename OperationType> -class aggregator : public aggregator_generic<OperationType> { - HandlerType handle_operations; -public: - aggregator() = default; - - void initialize_handler( HandlerType h ) { handle_operations = h; } - - void execute(OperationType* op) { - aggregator_generic<OperationType>::execute(op, handle_operations); - } -}; // class aggregator - -// the most-compatible friend declaration (vs, gcc, icc) is -// template<class U, class V> friend class aggregating_functor; -template <typename AggregatingClass, typename OperationList> -class aggregating_functor { - AggregatingClass* my_object; -public: - aggregating_functor() = default; - aggregating_functor( AggregatingClass* object ) : my_object(object) { - __TBB_ASSERT(my_object, nullptr); - } - - void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } -}; // class aggregating_functor - - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__aggregator_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + + +#ifndef __TBB_detail__aggregator_H +#define __TBB_detail__aggregator_H + +#include "_assert.h" +#include "_utils.h" +#include <atomic> +#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc +#include "../profiling.h" +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +// Base class for aggregated operation +template <typename Derived> +class aggregated_operation { +public: + // Zero value means "wait" status, all other values are "user" specified values and + // are defined into the scope of a class which uses "status" + std::atomic<uintptr_t> status; + + std::atomic<Derived*> next; + aggregated_operation() : status{}, next(nullptr) {} +}; // class aggregated_operation + +// Aggregator base class +/* An aggregator for collecting operations coming from multiple sources and executing + them serially on a single thread. OperationType must be derived from + aggregated_operation. The parameter HandlerType is a functor that will be passed the + list of operations and is expected to handle each operation appropriately, setting the + status of each operation to non-zero. */ +template <typename OperationType> +class aggregator_generic { +public: + aggregator_generic() : pending_operations(nullptr), handler_busy(false) {} + + // Execute an operation + /* Places an operation into the waitlist (pending_operations), and either handles the list, + or waits for the operation to complete, or returns. + The long_life_time parameter specifies the life time of the given operation object. + Operations with long_life_time == true may be accessed after execution. + A "short" life time operation (long_life_time == false) can be destroyed + during execution, and so any access to it after it was put into the waitlist, + including status check, is invalid. As a consequence, waiting for completion + of such operation causes undefined behavior. */ + template <typename HandlerType> + void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) { + // op->status should be read before inserting the operation into the + // aggregator waitlist since it can become invalid after executing a + // handler (if the operation has 'short' life time.) + const uintptr_t status = op->status.load(std::memory_order_relaxed); + + // ITT note: &(op->status) tag is used to cover accesses to this op node. This + // thread has created the operation, and now releases it so that the handler + // thread may handle the associated operation w/o triggering a race condition; + // thus this tag will be acquired just before the operation is handled in the + // handle_operations functor. + call_itt_notify(releasing, &(op->status)); + // insert the operation in the queue. + OperationType* res = pending_operations.load(std::memory_order_relaxed); + do { + op->next.store(res, std::memory_order_relaxed); + } while (!pending_operations.compare_exchange_strong(res, op)); + if (!res) { // first in the list; handle the operations + // ITT note: &pending_operations tag covers access to the handler_busy flag, + // which this waiting handler thread will try to set before entering + // handle_operations. + call_itt_notify(acquired, &pending_operations); + start_handle_operations(handle_operations); + // The operation with 'short' life time can already be destroyed + if (long_life_time) + __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL); + } + // Not first; wait for op to be ready + else if (!status) { // operation is blocking here. + __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing"); + call_itt_notify(prepare, &(op->status)); + spin_wait_while_eq(op->status, uintptr_t(0)); + } + } + +private: + // Trigger the handling of operations when the handler is free + template <typename HandlerType> + void start_handle_operations( HandlerType& handle_operations ) { + OperationType* op_list; + + // ITT note: &handler_busy tag covers access to pending_operations as it is passed + // between active and waiting handlers. Below, the waiting handler waits until + // the active handler releases, and the waiting handler acquires &handler_busy as + // it becomes the active_handler. The release point is at the end of this + // function, when all operations in pending_operations have been handled by the + // owner of this aggregator. + call_itt_notify(prepare, &handler_busy); + // get the handler_busy: + // only one thread can possibly spin here at a time + spin_wait_until_eq(handler_busy, uintptr_t(0)); + call_itt_notify(acquired, &handler_busy); + // acquire fence not necessary here due to causality rule and surrounding atomics + handler_busy.store(1, std::memory_order_relaxed); + + // ITT note: &pending_operations tag covers access to the handler_busy flag + // itself. Capturing the state of the pending_operations signifies that + // handler_busy has been set and a new active handler will now process that list's + // operations. + call_itt_notify(releasing, &pending_operations); + // grab pending_operations + op_list = pending_operations.exchange(nullptr); + + // handle all the operations + handle_operations(op_list); + + // release the handler + handler_busy.store(0, std::memory_order_release); + } + + // An atomically updated list (aka mailbox) of pending operations + std::atomic<OperationType*> pending_operations; + // Controls threads access to handle_operations + std::atomic<uintptr_t> handler_busy; +}; // class aggregator_generic + +template <typename HandlerType, typename OperationType> +class aggregator : public aggregator_generic<OperationType> { + HandlerType handle_operations; +public: + aggregator() = default; + + void initialize_handler( HandlerType h ) { handle_operations = h; } + + void execute(OperationType* op) { + aggregator_generic<OperationType>::execute(op, handle_operations); + } +}; // class aggregator + +// the most-compatible friend declaration (vs, gcc, icc) is +// template<class U, class V> friend class aggregating_functor; +template <typename AggregatingClass, typename OperationList> +class aggregating_functor { + AggregatingClass* my_object; +public: + aggregating_functor() = default; + aggregating_functor( AggregatingClass* object ) : my_object(object) { + __TBB_ASSERT(my_object, nullptr); + } + + void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } +}; // class aggregating_functor + + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__aggregator_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h index 13857c47cc..6889983b74 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h @@ -1,46 +1,46 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -#ifndef __TBB_aligned_space_H -#define __TBB_aligned_space_H - -#include <cstddef> - -#include "_template_helpers.h" - -namespace tbb { -namespace detail { -inline namespace d0 { - -//! Block of space aligned sufficiently to construct an array T with N elements. -/** The elements are not constructed or destroyed by this class. - @ingroup memory_allocation */ -template<typename T, std::size_t N = 1> -class aligned_space { - alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)]; - -public: - //! Pointer to beginning of array - T* begin() const { return punned_cast<T*>(&aligned_array); } - - //! Pointer to one past last element in array. - T* end() const { return begin() + N; } -}; - -} // namespace d0 -} // namespace detail -} // namespace tbb - -#endif /* __TBB_aligned_space_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef __TBB_aligned_space_H +#define __TBB_aligned_space_H + +#include <cstddef> + +#include "_template_helpers.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Block of space aligned sufficiently to construct an array T with N elements. +/** The elements are not constructed or destroyed by this class. + @ingroup memory_allocation */ +template<typename T, std::size_t N = 1> +class aligned_space { + alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)]; + +public: + //! Pointer to beginning of array + T* begin() const { return punned_cast<T*>(&aligned_array); } + + //! Pointer to one past last element in array. + T* end() const { return begin() + N; } +}; + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_aligned_space_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h index 8c60e25e7e..c3485d1424 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h @@ -1,107 +1,107 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__allocator_traits_H -#define __TBB_detail__allocator_traits_H - -#include "_config.h" -#include "_template_helpers.h" -#include <memory> -#include <type_traits> - -namespace tbb { -namespace detail { -inline namespace d0 { - -#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT -// Struct is_always_equal_detector provides the member type "type" which is -// Allocator::is_always_equal if it is present, std::false_type otherwise -template <typename Allocator, typename = void> -struct is_always_equal_detector { - using type = std::false_type; -}; - -template <typename Allocator> -struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>> -{ - using type = typename Allocator::is_always_equal; -}; -#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT - -template <typename Allocator> -class allocator_traits : public std::allocator_traits<Allocator> -{ - using base_type = std::allocator_traits<Allocator>; -public: -#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT - using is_always_equal = typename is_always_equal_detector<Allocator>::type; -#endif - - template <typename T> - using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>; -}; // struct allocator_traits - -template <typename Allocator> -void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) { - lhs = rhs; -} - -template <typename Allocator> -void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {} - -// Copy assigns allocators only if propagate_on_container_copy_assignment is true -template <typename Allocator> -void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) { - using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment; - copy_assign_allocators_impl(lhs, rhs, pocca_type()); -} - -template <typename Allocator> -void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) { - lhs = std::move(rhs); -} - -template <typename Allocator> -void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {} - -// Move assigns allocators only if propagate_on_container_move_assignment is true -template <typename Allocator> -void move_assign_allocators( Allocator& lhs, Allocator& rhs ) { - using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment; - move_assign_allocators_impl(lhs, rhs, pocma_type()); -} - -template <typename Allocator> -void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) { - using std::swap; - swap(lhs, rhs); -} - -template <typename Allocator> -void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {} - -// Swaps allocators only if propagate_on_container_swap is true -template <typename Allocator> -void swap_allocators( Allocator& lhs, Allocator& rhs ) { - using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap; - swap_allocators_impl(lhs, rhs, pocs_type()); -} - -} // inline namespace d0 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__allocator_traits_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__allocator_traits_H +#define __TBB_detail__allocator_traits_H + +#include "_config.h" +#include "_template_helpers.h" +#include <memory> +#include <type_traits> + +namespace tbb { +namespace detail { +inline namespace d0 { + +#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT +// Struct is_always_equal_detector provides the member type "type" which is +// Allocator::is_always_equal if it is present, std::false_type otherwise +template <typename Allocator, typename = void> +struct is_always_equal_detector { + using type = std::false_type; +}; + +template <typename Allocator> +struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>> +{ + using type = typename Allocator::is_always_equal; +}; +#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT + +template <typename Allocator> +class allocator_traits : public std::allocator_traits<Allocator> +{ + using base_type = std::allocator_traits<Allocator>; +public: +#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT + using is_always_equal = typename is_always_equal_detector<Allocator>::type; +#endif + + template <typename T> + using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>; +}; // struct allocator_traits + +template <typename Allocator> +void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) { + lhs = rhs; +} + +template <typename Allocator> +void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {} + +// Copy assigns allocators only if propagate_on_container_copy_assignment is true +template <typename Allocator> +void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) { + using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment; + copy_assign_allocators_impl(lhs, rhs, pocca_type()); +} + +template <typename Allocator> +void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) { + lhs = std::move(rhs); +} + +template <typename Allocator> +void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {} + +// Move assigns allocators only if propagate_on_container_move_assignment is true +template <typename Allocator> +void move_assign_allocators( Allocator& lhs, Allocator& rhs ) { + using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment; + move_assign_allocators_impl(lhs, rhs, pocma_type()); +} + +template <typename Allocator> +void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) { + using std::swap; + swap(lhs, rhs); +} + +template <typename Allocator> +void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {} + +// Swaps allocators only if propagate_on_container_swap is true +template <typename Allocator> +void swap_allocators( Allocator& lhs, Allocator& rhs ) { + using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap; + swap_allocators_impl(lhs, rhs, pocs_type()); +} + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__allocator_traits_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h index 4116386a92..d89e1178c8 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h @@ -1,52 +1,52 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__assert_H -#define __TBB_detail__assert_H - -#include "_config.h" - -namespace tbb { -namespace detail { -namespace r1 { -//! Process an assertion failure. -/** Normally called from __TBB_ASSERT macro. - If assertion handler is null, print message for assertion failure and abort. - Otherwise call the assertion handler. */ -void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment); -} // namespace r1 -} // namespace detail -} // namespace tbb - -//! Release version of assertions -#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message)) - -#if TBB_USE_ASSERT - //! Assert that predicate is true. - /** If predicate is false, print assertion failure message. - If the comment argument is not NULL, it is printed as part of the failure message. - The comment argument has no other effect. */ - #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) - //! "Extended" version - #define __TBB_ASSERT_EX __TBB_ASSERT -#else - //! No-op version of __TBB_ASSERT. - #define __TBB_ASSERT(predicate,comment) ((void)0) - //! "Extended" version is useful to suppress warnings if a variable is only used with an assert - #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) -#endif // TBB_USE_ASSERT - -#endif // __TBB_detail__assert_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__assert_H +#define __TBB_detail__assert_H + +#include "_config.h" + +namespace tbb { +namespace detail { +namespace r1 { +//! Process an assertion failure. +/** Normally called from __TBB_ASSERT macro. + If assertion handler is null, print message for assertion failure and abort. + Otherwise call the assertion handler. */ +void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment); +} // namespace r1 +} // namespace detail +} // namespace tbb + +//! Release version of assertions +#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message)) + +#if TBB_USE_ASSERT + //! Assert that predicate is true. + /** If predicate is false, print assertion failure message. + If the comment argument is not NULL, it is printed as part of the failure message. + The comment argument has no other effect. */ + #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) + //! "Extended" version + #define __TBB_ASSERT_EX __TBB_ASSERT +#else + //! No-op version of __TBB_ASSERT. + #define __TBB_ASSERT(predicate,comment) ((void)0) + //! "Extended" version is useful to suppress warnings if a variable is only used with an assert + #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) +#endif // TBB_USE_ASSERT + +#endif // __TBB_detail__assert_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h index 6289632601..d8df4d5c3f 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h @@ -1,659 +1,659 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__concurrent_queue_base_H -#define __TBB_detail__concurrent_queue_base_H - -#include "_utils.h" -#include "_exception.h" -#include "_machine.h" -#include "_allocator_traits.h" - -#include "../profiling.h" -#include "../spin_mutex.h" -#include "../cache_aligned_allocator.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace d1 { - -using ticket_type = std::size_t; - -template <typename Page> -inline bool is_valid_page(const Page p) { - return reinterpret_cast<std::uintptr_t>(p) > 1; -} - -template <typename T, typename Allocator> -struct concurrent_queue_rep; - -template <typename Container, typename T, typename Allocator> -class micro_queue_pop_finalizer; - -#if _MSC_VER && !defined(__INTEL_COMPILER) -// unary minus operator applied to unsigned type, result still unsigned -#pragma warning( push ) -#pragma warning( disable: 4146 ) -#endif - -// A queue using simple locking. -// For efficiency, this class has no constructor. -// The caller is expected to zero-initialize it. -template <typename T, typename Allocator> -class micro_queue { -private: - using queue_rep_type = concurrent_queue_rep<T, Allocator>; - using self_type = micro_queue<T, Allocator>; -public: - using size_type = std::size_t; - using value_type = T; - using reference = value_type&; - using const_reference = const value_type&; - - using allocator_type = Allocator; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - - static constexpr size_type item_size = sizeof(T); - static constexpr size_type items_per_page = item_size <= 8 ? 32 : - item_size <= 16 ? 16 : - item_size <= 32 ? 8 : - item_size <= 64 ? 4 : - item_size <= 128 ? 2 : 1; - - struct padded_page { - padded_page() {} - ~padded_page() {} - - reference operator[] (std::size_t index) { - __TBB_ASSERT(index < items_per_page, "Index out of range"); - return items[index]; - } - - const_reference operator[] (std::size_t index) const { - __TBB_ASSERT(index < items_per_page, "Index out of range"); - return items[index]; - } - - padded_page* next{ nullptr }; - std::atomic<std::uintptr_t> mask{}; - - union { - value_type items[items_per_page]; - }; - }; // struct padded_page - - using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>; -protected: - using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; - -public: - using item_constructor_type = void (*)(value_type* location, const void* src); - micro_queue() = default; - micro_queue( const micro_queue& ) = delete; - micro_queue& operator=( const micro_queue& ) = delete; - - size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator, - padded_page*& p ) { - __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page"); - k &= -queue_rep_type::n_queue; - size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page); - if (!index) { - try_call( [&] { - p = page_allocator_traits::allocate(page_allocator, 1); - }).on_exception( [&] { - ++base.n_invalid_entries; - invalidate_page( k ); - }); - page_allocator_traits::construct(page_allocator, p); - } - - if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); - call_itt_notify(acquired, &tail_counter); - - if (p) { - spin_mutex::scoped_lock lock( page_mutex ); - padded_page* q = tail_page.load(std::memory_order_relaxed); - if (is_valid_page(q)) { - q->next = p; - } else { - head_page.store(p, std::memory_order_relaxed); - } - tail_page.store(p, std::memory_order_relaxed);; - } else { - p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? - } - return index; - } - - template<typename... Args> - void push( ticket_type k, queue_rep_type& base, Args&&... args ) - { - padded_page* p = nullptr; - page_allocator_type page_allocator(base.get_allocator()); - size_type index = prepare_page(k, base, page_allocator, p); - __TBB_ASSERT(p != nullptr, "Page was not prepared"); - - // try_call API is not convenient here due to broken - // variadic capture on GCC 4.8.5 - auto value_guard = make_raii_guard([&] { - ++base.n_invalid_entries; - call_itt_notify(releasing, &tail_counter); - tail_counter.fetch_add(queue_rep_type::n_queue); - }); - - page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...); - // If no exception was thrown, mark item as present. - p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); - call_itt_notify(releasing, &tail_counter); - - value_guard.dismiss(); - tail_counter.fetch_add(queue_rep_type::n_queue); - } - - void abort_push( ticket_type k, queue_rep_type& base) { - padded_page* p = nullptr; - prepare_page(k, base, base.get_allocator(), p); - ++base.n_invalid_entries; - tail_counter.fetch_add(queue_rep_type::n_queue); - } - - bool pop( void* dst, ticket_type k, queue_rep_type& base ) { - k &= -queue_rep_type::n_queue; - if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k); - call_itt_notify(acquired, &head_counter); - if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k); - call_itt_notify(acquired, &tail_counter); - padded_page *p = head_page.load(std::memory_order_acquire); - __TBB_ASSERT( p, nullptr ); - size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); - bool success = false; - { - page_allocator_type page_allocator(base.get_allocator()); - micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator, - k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); - if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { - success = true; - assign_and_destroy_item( dst, *p, index ); - } else { - --base.n_invalid_entries; - } - } - return success; - } - - micro_queue& assign( const micro_queue& src, queue_rep_type& base, - item_constructor_type construct_item ) - { - head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); - tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); - - const padded_page* srcp = src.head_page.load(std::memory_order_relaxed); - if( is_valid_page(srcp) ) { - ticket_type g_index = head_counter.load(std::memory_order_relaxed); - size_type n_items = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed)) - / queue_rep_type::n_queue; - size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); - size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; - - try_call( [&] { - head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); - }).on_exception( [&] { - head_counter.store(0, std::memory_order_relaxed); - tail_counter.store(0, std::memory_order_relaxed); - }); - padded_page* cur_page = head_page.load(std::memory_order_relaxed); - - try_call( [&] { - if (srcp != src.tail_page.load(std::memory_order_relaxed)) { - for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { - cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item ); - cur_page = cur_page->next; - } - - __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr ); - size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); - if( last_index==0 ) last_index = items_per_page; - - cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item ); - cur_page = cur_page->next; - } - tail_page.store(cur_page, std::memory_order_relaxed); - }).on_exception( [&] { - padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); - tail_page.store(invalid_page, std::memory_order_relaxed); - }); - } else { - head_page.store(nullptr, std::memory_order_relaxed); - tail_page.store(nullptr, std::memory_order_relaxed); - } - return *this; - } - - padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page, - size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) - { - page_allocator_type page_allocator(base.get_allocator()); - padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); - new_page->next = nullptr; - new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); - for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) { - if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) { - copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item); - } - } - return new_page; - } - - void invalidate_page( ticket_type k ) { - // Append an invalid page at address 1 so that no more pushes are allowed. - padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); - { - spin_mutex::scoped_lock lock( page_mutex ); - tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed); - padded_page* q = tail_page.load(std::memory_order_relaxed); - if (is_valid_page(q)) { - q->next = invalid_page; - } else { - head_page.store(invalid_page, std::memory_order_relaxed); - } - tail_page.store(invalid_page, std::memory_order_relaxed); - } - } - - padded_page* get_tail_page() { - return tail_page.load(std::memory_order_relaxed); - } - - padded_page* get_head_page() { - return head_page.load(std::memory_order_relaxed); - } - - void set_tail_page( padded_page* pg ) { - tail_page.store(pg, std::memory_order_relaxed); - } - - void clear(queue_rep_type& base) { - padded_page* curr_page = head_page.load(std::memory_order_relaxed); - std::size_t index = head_counter.load(std::memory_order_relaxed); - page_allocator_type page_allocator(base.get_allocator()); - - while (curr_page) { - for (; index != items_per_page - 1; ++index) { - curr_page->operator[](index).~value_type(); - } - padded_page* next_page = curr_page->next; - page_allocator_traits::destroy(page_allocator, curr_page); - page_allocator_traits::deallocate(page_allocator, curr_page, 1); - curr_page = next_page; - } - - padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); - head_page.store(invalid_page, std::memory_order_relaxed); - tail_page.store(invalid_page, std::memory_order_relaxed); - } - -private: - // template <typename U, typename A> - friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>; - - // Class used to ensure exception-safety of method "pop" - class destroyer { - value_type& my_value; - public: - destroyer( reference value ) : my_value(value) {} - destroyer( const destroyer& ) = delete; - destroyer& operator=( const destroyer& ) = delete; - ~destroyer() {my_value.~T();} - }; // class destroyer - - void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex, - item_constructor_type construct_item ) - { - auto& src_item = src[sindex]; - construct_item( &dst[dindex], static_cast<const void*>(&src_item) ); - } - - void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) { - auto& from = src[index]; - destroyer d(from); - *static_cast<T*>(dst) = std::move(from); - } - - void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const { - for (atomic_backoff b(true);; b.pause()) { - ticket_type c = counter; - if (c == k) return; - else if (c & 1) { - ++rb.n_invalid_entries; - throw_exception( exception_id::bad_last_alloc); - } - } - } - - std::atomic<padded_page*> head_page{}; - std::atomic<ticket_type> head_counter{}; - - std::atomic<padded_page*> tail_page{}; - std::atomic<ticket_type> tail_counter{}; - - spin_mutex page_mutex{}; -}; // class micro_queue - -#if _MSC_VER && !defined(__INTEL_COMPILER) -#pragma warning( pop ) -#endif // warning 4146 is back - -template <typename Container, typename T, typename Allocator> -class micro_queue_pop_finalizer { -public: - using padded_page = typename Container::padded_page; - using allocator_type = Allocator; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - - micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) : - my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc) - {} - - micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete; - micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete; - - ~micro_queue_pop_finalizer() { - padded_page* p = my_page; - if( is_valid_page(p) ) { - spin_mutex::scoped_lock lock( my_queue.page_mutex ); - padded_page* q = p->next; - my_queue.head_page.store(q, std::memory_order_relaxed); - if( !is_valid_page(q) ) { - my_queue.tail_page.store(nullptr, std::memory_order_relaxed); - } - } - my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed); - if ( is_valid_page(p) ) { - allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p)); - allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1); - } - } -private: - ticket_type my_ticket_type; - Container& my_queue; - padded_page* my_page; - Allocator& allocator; -}; // class micro_queue_pop_finalizer - -#if _MSC_VER && !defined(__INTEL_COMPILER) -// structure was padded due to alignment specifier -#pragma warning( push ) -#pragma warning( disable: 4324 ) -#endif - -template <typename T, typename Allocator> -struct concurrent_queue_rep { - using self_type = concurrent_queue_rep<T, Allocator>; - using size_type = std::size_t; - using micro_queue_type = micro_queue<T, Allocator>; - using allocator_type = Allocator; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - using padded_page = typename micro_queue_type::padded_page; - using page_allocator_type = typename micro_queue_type::page_allocator_type; - using item_constructor_type = typename micro_queue_type::item_constructor_type; -private: - using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; - using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>; - -public: - // must be power of 2 - static constexpr size_type n_queue = 8; - // Approximately n_queue/golden ratio - static constexpr size_type phi = 3; - static constexpr size_type item_size = micro_queue_type::item_size; - static constexpr size_type items_per_page = micro_queue_type::items_per_page; - - concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc) - {} - - concurrent_queue_rep( const concurrent_queue_rep& ) = delete; - concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; - - void clear() { - page_allocator_type page_allocator(my_queue_allocator); - for (size_type i = 0; i < n_queue; ++i) { - padded_page* tail_page = array[i].get_tail_page(); - if( is_valid_page(tail_page) ) { - __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" ); - page_allocator_traits::destroy(page_allocator, static_cast<padded_page*>(tail_page)); - page_allocator_traits::deallocate(page_allocator, static_cast<padded_page*>(tail_page), 1); - array[i].set_tail_page(nullptr); - } else { - __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?"); - } - } - } - - void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) { - head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); - tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); - n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); - - // copy or move micro_queues - size_type queue_idx = 0; - try_call( [&] { - for (; queue_idx < n_queue; ++queue_idx) { - array[queue_idx].assign(src.array[queue_idx], *this, construct_item); - } - }).on_exception( [&] { - for (size_type i = 0; i < queue_idx + 1; ++i) { - array[i].clear(*this); - } - head_counter.store(0, std::memory_order_relaxed); - tail_counter.store(0, std::memory_order_relaxed); - n_invalid_entries.store(0, std::memory_order_relaxed); - }); - - __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) && - tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed), - "the source concurrent queue should not be concurrently modified." ); - } - - bool empty() const { - ticket_type tc = tail_counter.load(std::memory_order_acquire); - ticket_type hc = head_counter.load(std::memory_order_relaxed); - // if tc!=r.tail_counter, the queue was not empty at some point between the two reads. - return tc == tail_counter.load(std::memory_order_relaxed) && - std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0; - } - - std::ptrdiff_t size() const { - __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); - std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); - std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); - std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); - - return tc - hc - nie; - } - - queue_allocator_type& get_allocator() { - return my_queue_allocator; - } - - friend class micro_queue<T, Allocator>; - - // Map ticket_type to an array index - static size_type index( ticket_type k ) { - return k * phi % n_queue; - } - - micro_queue_type& choose( ticket_type k ) { - // The formula here approximates LRU in a cache-oblivious way. - return array[index(k)]; - } - - alignas(max_nfs_size) micro_queue_type array[n_queue]; - - alignas(max_nfs_size) std::atomic<ticket_type> head_counter{}; - alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{}; - alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{}; - queue_allocator_type& my_queue_allocator; -}; // class concurrent_queue_rep - -#if _MSC_VER && !defined(__INTEL_COMPILER) -#pragma warning( pop ) -#endif - -template <typename Value, typename Allocator> -class concurrent_queue_iterator_base { - using queue_rep_type = concurrent_queue_rep<Value, Allocator>; - using padded_page = typename queue_rep_type::padded_page; -protected: - concurrent_queue_iterator_base() = default; - - concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) { - assign(other); - } - - concurrent_queue_iterator_base( queue_rep_type* queue_rep ) - : my_queue_rep(queue_rep), - my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed)) - { - for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { - my_array[i] = my_queue_rep->array[i].get_head_page(); - } - - if (!get_item(my_item, my_head_counter)) advance(); - } - - void assign( const concurrent_queue_iterator_base& other ) { - my_item = other.my_item; - my_queue_rep = other.my_queue_rep; - - if (my_queue_rep != nullptr) { - my_head_counter = other.my_head_counter; - - for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { - my_array[i] = other.my_array[i]; - } - } - } - - void advance() { - __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue"); - std::size_t k = my_head_counter; -#if TBB_USE_ASSERT - Value* tmp; - get_item(tmp, k); - __TBB_ASSERT(my_item == tmp, nullptr); -#endif - std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); - if (i == my_queue_rep->items_per_page - 1) { - padded_page*& root = my_array[queue_rep_type::index(k)]; - root = root->next; - } - // Advance k - my_head_counter = ++k; - if (!get_item(my_item, k)) advance(); - } - - concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) { - this->assign(other); - return *this; - } - - bool get_item( Value*& item, std::size_t k ) { - if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) { - item = nullptr; - return true; - } else { - padded_page* p = my_array[queue_rep_type::index(k)]; - __TBB_ASSERT(p, nullptr); - std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); - item = &(*p)[i]; - return (p->mask & uintptr_t(1) << i) != 0; - } - } - - Value* my_item{ nullptr }; - queue_rep_type* my_queue_rep{ nullptr }; - ticket_type my_head_counter{}; - padded_page* my_array[queue_rep_type::n_queue]; -}; // class concurrent_queue_iterator_base - -struct concurrent_queue_iterator_provider { - template <typename Iterator, typename Container> - static Iterator get( const Container& container ) { - return Iterator(container); - } -}; // struct concurrent_queue_iterator_provider - -template <typename Container, typename Value, typename Allocator> -class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> { - using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>; -public: - using value_type = Value; - using pointer = value_type*; - using reference = value_type&; - using difference_type = std::ptrdiff_t; - using iterator_category = std::forward_iterator_tag; - - concurrent_queue_iterator() = default; - - /** If Value==Container::value_type, then this routine is the copy constructor. - If Value==const Container::value_type, then this routine is a conversion constructor. */ - concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) - : base_type(other) {} - -private: - concurrent_queue_iterator( const Container& container ) - : base_type(container.my_queue_representation) {} -public: - concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) { - this->assign(other); - return *this; - } - - reference operator*() const { - return *static_cast<pointer>(this->my_item); - } - - pointer operator->() const { return &operator*(); } - - concurrent_queue_iterator& operator++() { - this->advance(); - return *this; - } - - concurrent_queue_iterator operator++(int) { - concurrent_queue_iterator tmp = *this; - ++*this; - return tmp; - } - - friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { - return lhs.my_item == rhs.my_item; - } - - friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { - return lhs.my_item != rhs.my_item; - } -private: - friend struct concurrent_queue_iterator_provider; -}; // class concurrent_queue_iterator - -} // namespace d1 -} // namespace detail -} // tbb - -#endif // __TBB_detail__concurrent_queue_base_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_queue_base_H +#define __TBB_detail__concurrent_queue_base_H + +#include "_utils.h" +#include "_exception.h" +#include "_machine.h" +#include "_allocator_traits.h" + +#include "../profiling.h" +#include "../spin_mutex.h" +#include "../cache_aligned_allocator.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +using ticket_type = std::size_t; + +template <typename Page> +inline bool is_valid_page(const Page p) { + return reinterpret_cast<std::uintptr_t>(p) > 1; +} + +template <typename T, typename Allocator> +struct concurrent_queue_rep; + +template <typename Container, typename T, typename Allocator> +class micro_queue_pop_finalizer; + +#if _MSC_VER && !defined(__INTEL_COMPILER) +// unary minus operator applied to unsigned type, result still unsigned +#pragma warning( push ) +#pragma warning( disable: 4146 ) +#endif + +// A queue using simple locking. +// For efficiency, this class has no constructor. +// The caller is expected to zero-initialize it. +template <typename T, typename Allocator> +class micro_queue { +private: + using queue_rep_type = concurrent_queue_rep<T, Allocator>; + using self_type = micro_queue<T, Allocator>; +public: + using size_type = std::size_t; + using value_type = T; + using reference = value_type&; + using const_reference = const value_type&; + + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + + static constexpr size_type item_size = sizeof(T); + static constexpr size_type items_per_page = item_size <= 8 ? 32 : + item_size <= 16 ? 16 : + item_size <= 32 ? 8 : + item_size <= 64 ? 4 : + item_size <= 128 ? 2 : 1; + + struct padded_page { + padded_page() {} + ~padded_page() {} + + reference operator[] (std::size_t index) { + __TBB_ASSERT(index < items_per_page, "Index out of range"); + return items[index]; + } + + const_reference operator[] (std::size_t index) const { + __TBB_ASSERT(index < items_per_page, "Index out of range"); + return items[index]; + } + + padded_page* next{ nullptr }; + std::atomic<std::uintptr_t> mask{}; + + union { + value_type items[items_per_page]; + }; + }; // struct padded_page + + using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>; +protected: + using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; + +public: + using item_constructor_type = void (*)(value_type* location, const void* src); + micro_queue() = default; + micro_queue( const micro_queue& ) = delete; + micro_queue& operator=( const micro_queue& ) = delete; + + size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator, + padded_page*& p ) { + __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page"); + k &= -queue_rep_type::n_queue; + size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page); + if (!index) { + try_call( [&] { + p = page_allocator_traits::allocate(page_allocator, 1); + }).on_exception( [&] { + ++base.n_invalid_entries; + invalidate_page( k ); + }); + page_allocator_traits::construct(page_allocator, p); + } + + if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); + call_itt_notify(acquired, &tail_counter); + + if (p) { + spin_mutex::scoped_lock lock( page_mutex ); + padded_page* q = tail_page.load(std::memory_order_relaxed); + if (is_valid_page(q)) { + q->next = p; + } else { + head_page.store(p, std::memory_order_relaxed); + } + tail_page.store(p, std::memory_order_relaxed);; + } else { + p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? + } + return index; + } + + template<typename... Args> + void push( ticket_type k, queue_rep_type& base, Args&&... args ) + { + padded_page* p = nullptr; + page_allocator_type page_allocator(base.get_allocator()); + size_type index = prepare_page(k, base, page_allocator, p); + __TBB_ASSERT(p != nullptr, "Page was not prepared"); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + ++base.n_invalid_entries; + call_itt_notify(releasing, &tail_counter); + tail_counter.fetch_add(queue_rep_type::n_queue); + }); + + page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...); + // If no exception was thrown, mark item as present. + p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); + call_itt_notify(releasing, &tail_counter); + + value_guard.dismiss(); + tail_counter.fetch_add(queue_rep_type::n_queue); + } + + void abort_push( ticket_type k, queue_rep_type& base) { + padded_page* p = nullptr; + prepare_page(k, base, base.get_allocator(), p); + ++base.n_invalid_entries; + tail_counter.fetch_add(queue_rep_type::n_queue); + } + + bool pop( void* dst, ticket_type k, queue_rep_type& base ) { + k &= -queue_rep_type::n_queue; + if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k); + call_itt_notify(acquired, &head_counter); + if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k); + call_itt_notify(acquired, &tail_counter); + padded_page *p = head_page.load(std::memory_order_acquire); + __TBB_ASSERT( p, nullptr ); + size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); + bool success = false; + { + page_allocator_type page_allocator(base.get_allocator()); + micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator, + k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); + if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { + success = true; + assign_and_destroy_item( dst, *p, index ); + } else { + --base.n_invalid_entries; + } + } + return success; + } + + micro_queue& assign( const micro_queue& src, queue_rep_type& base, + item_constructor_type construct_item ) + { + head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + + const padded_page* srcp = src.head_page.load(std::memory_order_relaxed); + if( is_valid_page(srcp) ) { + ticket_type g_index = head_counter.load(std::memory_order_relaxed); + size_type n_items = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed)) + / queue_rep_type::n_queue; + size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); + size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; + + try_call( [&] { + head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); + }).on_exception( [&] { + head_counter.store(0, std::memory_order_relaxed); + tail_counter.store(0, std::memory_order_relaxed); + }); + padded_page* cur_page = head_page.load(std::memory_order_relaxed); + + try_call( [&] { + if (srcp != src.tail_page.load(std::memory_order_relaxed)) { + for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { + cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item ); + cur_page = cur_page->next; + } + + __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr ); + size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); + if( last_index==0 ) last_index = items_per_page; + + cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item ); + cur_page = cur_page->next; + } + tail_page.store(cur_page, std::memory_order_relaxed); + }).on_exception( [&] { + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + tail_page.store(invalid_page, std::memory_order_relaxed); + }); + } else { + head_page.store(nullptr, std::memory_order_relaxed); + tail_page.store(nullptr, std::memory_order_relaxed); + } + return *this; + } + + padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page, + size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) + { + page_allocator_type page_allocator(base.get_allocator()); + padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); + new_page->next = nullptr; + new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); + for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) { + if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) { + copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item); + } + } + return new_page; + } + + void invalidate_page( ticket_type k ) { + // Append an invalid page at address 1 so that no more pushes are allowed. + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + { + spin_mutex::scoped_lock lock( page_mutex ); + tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed); + padded_page* q = tail_page.load(std::memory_order_relaxed); + if (is_valid_page(q)) { + q->next = invalid_page; + } else { + head_page.store(invalid_page, std::memory_order_relaxed); + } + tail_page.store(invalid_page, std::memory_order_relaxed); + } + } + + padded_page* get_tail_page() { + return tail_page.load(std::memory_order_relaxed); + } + + padded_page* get_head_page() { + return head_page.load(std::memory_order_relaxed); + } + + void set_tail_page( padded_page* pg ) { + tail_page.store(pg, std::memory_order_relaxed); + } + + void clear(queue_rep_type& base) { + padded_page* curr_page = head_page.load(std::memory_order_relaxed); + std::size_t index = head_counter.load(std::memory_order_relaxed); + page_allocator_type page_allocator(base.get_allocator()); + + while (curr_page) { + for (; index != items_per_page - 1; ++index) { + curr_page->operator[](index).~value_type(); + } + padded_page* next_page = curr_page->next; + page_allocator_traits::destroy(page_allocator, curr_page); + page_allocator_traits::deallocate(page_allocator, curr_page, 1); + curr_page = next_page; + } + + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + head_page.store(invalid_page, std::memory_order_relaxed); + tail_page.store(invalid_page, std::memory_order_relaxed); + } + +private: + // template <typename U, typename A> + friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>; + + // Class used to ensure exception-safety of method "pop" + class destroyer { + value_type& my_value; + public: + destroyer( reference value ) : my_value(value) {} + destroyer( const destroyer& ) = delete; + destroyer& operator=( const destroyer& ) = delete; + ~destroyer() {my_value.~T();} + }; // class destroyer + + void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex, + item_constructor_type construct_item ) + { + auto& src_item = src[sindex]; + construct_item( &dst[dindex], static_cast<const void*>(&src_item) ); + } + + void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) { + auto& from = src[index]; + destroyer d(from); + *static_cast<T*>(dst) = std::move(from); + } + + void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const { + for (atomic_backoff b(true);; b.pause()) { + ticket_type c = counter; + if (c == k) return; + else if (c & 1) { + ++rb.n_invalid_entries; + throw_exception( exception_id::bad_last_alloc); + } + } + } + + std::atomic<padded_page*> head_page{}; + std::atomic<ticket_type> head_counter{}; + + std::atomic<padded_page*> tail_page{}; + std::atomic<ticket_type> tail_counter{}; + + spin_mutex page_mutex{}; +}; // class micro_queue + +#if _MSC_VER && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif // warning 4146 is back + +template <typename Container, typename T, typename Allocator> +class micro_queue_pop_finalizer { +public: + using padded_page = typename Container::padded_page; + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + + micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) : + my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc) + {} + + micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete; + micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete; + + ~micro_queue_pop_finalizer() { + padded_page* p = my_page; + if( is_valid_page(p) ) { + spin_mutex::scoped_lock lock( my_queue.page_mutex ); + padded_page* q = p->next; + my_queue.head_page.store(q, std::memory_order_relaxed); + if( !is_valid_page(q) ) { + my_queue.tail_page.store(nullptr, std::memory_order_relaxed); + } + } + my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed); + if ( is_valid_page(p) ) { + allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p)); + allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1); + } + } +private: + ticket_type my_ticket_type; + Container& my_queue; + padded_page* my_page; + Allocator& allocator; +}; // class micro_queue_pop_finalizer + +#if _MSC_VER && !defined(__INTEL_COMPILER) +// structure was padded due to alignment specifier +#pragma warning( push ) +#pragma warning( disable: 4324 ) +#endif + +template <typename T, typename Allocator> +struct concurrent_queue_rep { + using self_type = concurrent_queue_rep<T, Allocator>; + using size_type = std::size_t; + using micro_queue_type = micro_queue<T, Allocator>; + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using padded_page = typename micro_queue_type::padded_page; + using page_allocator_type = typename micro_queue_type::page_allocator_type; + using item_constructor_type = typename micro_queue_type::item_constructor_type; +private: + using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>; + +public: + // must be power of 2 + static constexpr size_type n_queue = 8; + // Approximately n_queue/golden ratio + static constexpr size_type phi = 3; + static constexpr size_type item_size = micro_queue_type::item_size; + static constexpr size_type items_per_page = micro_queue_type::items_per_page; + + concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc) + {} + + concurrent_queue_rep( const concurrent_queue_rep& ) = delete; + concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; + + void clear() { + page_allocator_type page_allocator(my_queue_allocator); + for (size_type i = 0; i < n_queue; ++i) { + padded_page* tail_page = array[i].get_tail_page(); + if( is_valid_page(tail_page) ) { + __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" ); + page_allocator_traits::destroy(page_allocator, static_cast<padded_page*>(tail_page)); + page_allocator_traits::deallocate(page_allocator, static_cast<padded_page*>(tail_page), 1); + array[i].set_tail_page(nullptr); + } else { + __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?"); + } + } + } + + void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) { + head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); + + // copy or move micro_queues + size_type queue_idx = 0; + try_call( [&] { + for (; queue_idx < n_queue; ++queue_idx) { + array[queue_idx].assign(src.array[queue_idx], *this, construct_item); + } + }).on_exception( [&] { + for (size_type i = 0; i < queue_idx + 1; ++i) { + array[i].clear(*this); + } + head_counter.store(0, std::memory_order_relaxed); + tail_counter.store(0, std::memory_order_relaxed); + n_invalid_entries.store(0, std::memory_order_relaxed); + }); + + __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) && + tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed), + "the source concurrent queue should not be concurrently modified." ); + } + + bool empty() const { + ticket_type tc = tail_counter.load(std::memory_order_acquire); + ticket_type hc = head_counter.load(std::memory_order_relaxed); + // if tc!=r.tail_counter, the queue was not empty at some point between the two reads. + return tc == tail_counter.load(std::memory_order_relaxed) && + std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0; + } + + std::ptrdiff_t size() const { + __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); + std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); + std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); + std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); + + return tc - hc - nie; + } + + queue_allocator_type& get_allocator() { + return my_queue_allocator; + } + + friend class micro_queue<T, Allocator>; + + // Map ticket_type to an array index + static size_type index( ticket_type k ) { + return k * phi % n_queue; + } + + micro_queue_type& choose( ticket_type k ) { + // The formula here approximates LRU in a cache-oblivious way. + return array[index(k)]; + } + + alignas(max_nfs_size) micro_queue_type array[n_queue]; + + alignas(max_nfs_size) std::atomic<ticket_type> head_counter{}; + alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{}; + alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{}; + queue_allocator_type& my_queue_allocator; +}; // class concurrent_queue_rep + +#if _MSC_VER && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif + +template <typename Value, typename Allocator> +class concurrent_queue_iterator_base { + using queue_rep_type = concurrent_queue_rep<Value, Allocator>; + using padded_page = typename queue_rep_type::padded_page; +protected: + concurrent_queue_iterator_base() = default; + + concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) { + assign(other); + } + + concurrent_queue_iterator_base( queue_rep_type* queue_rep ) + : my_queue_rep(queue_rep), + my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed)) + { + for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { + my_array[i] = my_queue_rep->array[i].get_head_page(); + } + + if (!get_item(my_item, my_head_counter)) advance(); + } + + void assign( const concurrent_queue_iterator_base& other ) { + my_item = other.my_item; + my_queue_rep = other.my_queue_rep; + + if (my_queue_rep != nullptr) { + my_head_counter = other.my_head_counter; + + for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { + my_array[i] = other.my_array[i]; + } + } + } + + void advance() { + __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue"); + std::size_t k = my_head_counter; +#if TBB_USE_ASSERT + Value* tmp; + get_item(tmp, k); + __TBB_ASSERT(my_item == tmp, nullptr); +#endif + std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); + if (i == my_queue_rep->items_per_page - 1) { + padded_page*& root = my_array[queue_rep_type::index(k)]; + root = root->next; + } + // Advance k + my_head_counter = ++k; + if (!get_item(my_item, k)) advance(); + } + + concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) { + this->assign(other); + return *this; + } + + bool get_item( Value*& item, std::size_t k ) { + if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) { + item = nullptr; + return true; + } else { + padded_page* p = my_array[queue_rep_type::index(k)]; + __TBB_ASSERT(p, nullptr); + std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); + item = &(*p)[i]; + return (p->mask & uintptr_t(1) << i) != 0; + } + } + + Value* my_item{ nullptr }; + queue_rep_type* my_queue_rep{ nullptr }; + ticket_type my_head_counter{}; + padded_page* my_array[queue_rep_type::n_queue]; +}; // class concurrent_queue_iterator_base + +struct concurrent_queue_iterator_provider { + template <typename Iterator, typename Container> + static Iterator get( const Container& container ) { + return Iterator(container); + } +}; // struct concurrent_queue_iterator_provider + +template <typename Container, typename Value, typename Allocator> +class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> { + using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>; +public: + using value_type = Value; + using pointer = value_type*; + using reference = value_type&; + using difference_type = std::ptrdiff_t; + using iterator_category = std::forward_iterator_tag; + + concurrent_queue_iterator() = default; + + /** If Value==Container::value_type, then this routine is the copy constructor. + If Value==const Container::value_type, then this routine is a conversion constructor. */ + concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) + : base_type(other) {} + +private: + concurrent_queue_iterator( const Container& container ) + : base_type(container.my_queue_representation) {} +public: + concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) { + this->assign(other); + return *this; + } + + reference operator*() const { + return *static_cast<pointer>(this->my_item); + } + + pointer operator->() const { return &operator*(); } + + concurrent_queue_iterator& operator++() { + this->advance(); + return *this; + } + + concurrent_queue_iterator operator++(int) { + concurrent_queue_iterator tmp = *this; + ++*this; + return tmp; + } + + friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { + return lhs.my_item == rhs.my_item; + } + + friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { + return lhs.my_item != rhs.my_item; + } +private: + friend struct concurrent_queue_iterator_provider; +}; // class concurrent_queue_iterator + +} // namespace d1 +} // namespace detail +} // tbb + +#endif // __TBB_detail__concurrent_queue_base_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h index c4d4c627e0..734e9ac3d5 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h @@ -1,1252 +1,1252 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__concurrent_skip_list_H -#define __TBB_detail__concurrent_skip_list_H - -#if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H) -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -#include "_config.h" -#include "_range_common.h" -#include "_allocator_traits.h" -#include "_template_helpers.h" -#include "_node_handle.h" -#include "_containers_helpers.h" -#include "_assert.h" -#include "_exception.h" -#include "../enumerable_thread_specific.h" -#include <utility> -#include <initializer_list> -#include <atomic> -#include <array> -#include <type_traits> -#include <random> // Need std::geometric_distribution -#include <algorithm> // Need std::equal and std::lexicographical_compare -#include <cstdint> -#if __TBB_CPP20_COMPARISONS_PRESENT -#include <compare> -#endif - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(push) -#pragma warning(disable: 4127) // warning C4127: conditional expression is constant -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Value, typename Allocator> -class skip_list_node { - using node_ptr = skip_list_node*; -public: - using value_type = Value; - using atomic_node_ptr = std::atomic<node_ptr>; - using size_type = std::size_t; - using container_allocator_type = Allocator; - - using reference = value_type&; - using const_reference = const value_type&; -private: - using allocator_traits = tbb::detail::allocator_traits<container_allocator_type>; - - // Allocator is the same as the container allocator=> allocates unitptr_t - // It is required to rebind it to value_type to get the correct pointer and const_pointer - using value_allocator_traits = typename allocator_traits::template rebind_traits<value_type>; -public: - using pointer = typename value_allocator_traits::pointer; - using const_pointer = typename value_allocator_traits::const_pointer; - - skip_list_node( size_type levels, container_allocator_type& alloc ) - : my_container_allocator(alloc), my_height(levels), my_index_number(0) - { - for (size_type l = 0; l < my_height; ++l) { - allocator_traits::construct(my_container_allocator, &get_atomic_next(l), nullptr); - } - } - - ~skip_list_node() { - for (size_type l = 0; l < my_height; ++l) { - allocator_traits::destroy(my_container_allocator, &get_atomic_next(l)); - } - } - - skip_list_node( const skip_list_node& ) = delete; - skip_list_node( skip_list_node&& ) = delete; - skip_list_node& operator=( const skip_list_node& ) = delete; - skip_list_node& operator=( skip_list_node&& ) = delete; - - pointer storage() { - return &my_value; - } - - reference value() { - return *storage(); - } - - node_ptr next( size_type level ) const { - node_ptr res = get_atomic_next(level).load(std::memory_order_acquire); - __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure"); - return res; - } - - atomic_node_ptr& atomic_next( size_type level ) { - atomic_node_ptr& res = get_atomic_next(level); -#if TBB_USE_DEBUG - node_ptr node = res.load(std::memory_order_acquire); - __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure"); -#endif - return res; - } - - void set_next( size_type level, node_ptr n ) { - __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure"); - get_atomic_next(level).store(n, std::memory_order_relaxed); - } - - size_type height() const { - return my_height; - } - - void set_index_number( size_type index_num ) { - my_index_number = index_num; - } - - size_type index_number() const { - return my_index_number; - } - -private: - atomic_node_ptr& get_atomic_next( size_type level ) { - atomic_node_ptr* arr = reinterpret_cast<atomic_node_ptr*>(this + 1); - return arr[level]; - } - - const atomic_node_ptr& get_atomic_next( size_type level ) const { - const atomic_node_ptr* arr = reinterpret_cast<const atomic_node_ptr*>(this + 1); - return arr[level]; - } - - container_allocator_type& my_container_allocator; - union { - value_type my_value; - }; - size_type my_height; - size_type my_index_number; -}; // class skip_list_node - -template <typename NodeType, typename ValueType> -class skip_list_iterator { - using node_type = NodeType; - using node_ptr = node_type*; -public: - using iterator_category = std::forward_iterator_tag; - using value_type = ValueType; - - using difference_type = std::ptrdiff_t; - using pointer = value_type*; - using reference = value_type&; - - skip_list_iterator() : skip_list_iterator(nullptr) {} - - skip_list_iterator( const skip_list_iterator<node_type, typename node_type::value_type>& other ) - : my_node_ptr(other.my_node_ptr) {} - - skip_list_iterator& operator=( const skip_list_iterator<node_type, typename node_type::value_type>& other ) { - my_node_ptr = other.my_node_ptr; - return *this; - } - - reference operator*() const { return my_node_ptr->value(); } - pointer operator->() const { return my_node_ptr->storage(); } - - skip_list_iterator& operator++() { - __TBB_ASSERT(my_node_ptr != nullptr, nullptr); - my_node_ptr = my_node_ptr->next(0); - return *this; - } - - skip_list_iterator operator++(int) { - skip_list_iterator tmp = *this; - ++*this; - return tmp; - } - -private: - skip_list_iterator(node_type* n) : my_node_ptr(n) {} - - node_ptr my_node_ptr; - - template <typename Traits> - friend class concurrent_skip_list; - - template <typename N, typename V> - friend class skip_list_iterator; - - friend class const_range; - friend class range; - - friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { - return lhs.my_node_ptr == rhs.my_node_ptr; - } - - friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { - return lhs.my_node_ptr != rhs.my_node_ptr; - } -}; // class skip_list_iterator - -template <typename Traits> -class concurrent_skip_list { -protected: - using container_traits = Traits; - using self_type = concurrent_skip_list<container_traits>; - using allocator_type = typename container_traits::allocator_type; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - using key_compare = typename container_traits::compare_type; - using value_compare = typename container_traits::value_compare; - using key_type = typename container_traits::key_type; - using value_type = typename container_traits::value_type; - static_assert(std::is_same<value_type, typename allocator_type::value_type>::value, - "value_type of the container should be the same as its allocator"); - - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - static constexpr size_type max_level = container_traits::max_level; - - using node_allocator_type = typename allocator_traits_type::template rebind_alloc<std::uint8_t>; - using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; - - using list_node_type = skip_list_node<value_type, node_allocator_type>; - using node_type = node_handle<key_type, value_type, list_node_type, allocator_type>; - - using iterator = skip_list_iterator<list_node_type, value_type>; - using const_iterator = skip_list_iterator<list_node_type, const value_type>; - - using reference = value_type&; - using const_reference = const value_type&; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using random_level_generator_type = typename container_traits::random_level_generator_type; - - using node_ptr = list_node_type*; - - using array_type = std::array<node_ptr, max_level>; -private: - template <typename T> - using is_transparent = dependent_bool<comp_is_transparent<key_compare>, T>; -public: - static constexpr bool allow_multimapping = container_traits::allow_multimapping; - - concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {} - - explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() ) - : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {} - - explicit concurrent_skip_list( const allocator_type& alloc ) - : concurrent_skip_list(key_compare(), alloc) {} - - template<typename InputIterator> - concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(), - const allocator_type& alloc = allocator_type() ) - : concurrent_skip_list(comp, alloc) - { - internal_copy(first, last); - } - - template <typename InputIterator> - concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc ) - : concurrent_skip_list(first, last, key_compare(), alloc) {} - - concurrent_skip_list( std::initializer_list<value_type> init, const key_compare& comp = key_compare(), - const allocator_type& alloc = allocator_type() ) - : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {} - - concurrent_skip_list( std::initializer_list<value_type> init, const allocator_type& alloc ) - : concurrent_skip_list(init, key_compare(), alloc) {} - - concurrent_skip_list( const concurrent_skip_list& other ) - : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())), - my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), - my_size(0), my_max_height(0) - { - internal_copy(other); - __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); - } - - concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc ) - : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), - my_size(0), my_max_height(0) - { - internal_copy(other); - __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); - } - - concurrent_skip_list( concurrent_skip_list&& other ) - : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare), - my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move - { - internal_move(std::move(other)); - } - - concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc ) - : my_node_allocator(alloc), my_compare(other.my_compare), - my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) - { - using is_always_equal = typename allocator_traits_type::is_always_equal; - internal_move_construct_with_allocator(std::move(other), is_always_equal()); - } - - ~concurrent_skip_list() { - clear(); - node_ptr head = my_head_ptr.load(std::memory_order_relaxed); - if (head != nullptr) { - delete_node(head); - } - } - - concurrent_skip_list& operator=( const concurrent_skip_list& other ) { - if (this != &other) { - clear(); - copy_assign_allocators(my_node_allocator, other.my_node_allocator); - my_compare = other.my_compare; - my_rng = other.my_rng; - internal_copy(other); - } - return *this; - } - - concurrent_skip_list& operator=( concurrent_skip_list&& other ) { - if (this != &other) { - clear(); - my_compare = std::move(other.my_compare); - my_rng = std::move(other.my_rng); - - move_assign_allocators(my_node_allocator, other.my_node_allocator); - using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; - using is_always_equal = typename node_allocator_traits::is_always_equal; - internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); - } - return *this; - } - - concurrent_skip_list& operator=( std::initializer_list<value_type> il ) - { - clear(); - insert(il.begin(),il.end()); - return *this; - } - - std::pair<iterator, bool> insert( const value_type& value ) { - return internal_insert(value); - } - - std::pair<iterator, bool> insert( value_type&& value ) { - return internal_insert(std::move(value)); - } - - iterator insert( const_iterator, const_reference value ) { - // Ignore hint - return insert(value).first; - } - - iterator insert( const_iterator, value_type&& value ) { - // Ignore hint - return insert(std::move(value)).first; - } - - template<typename InputIterator> - void insert( InputIterator first, InputIterator last ) { - while (first != last) { - insert(*first); - ++first; - } - } - - void insert( std::initializer_list<value_type> init ) { - insert(init.begin(), init.end()); - } - - std::pair<iterator, bool> insert( node_type&& nh ) { - if (!nh.empty()) { - auto insert_node = node_handle_accessor::get_node_ptr(nh); - std::pair<iterator, bool> insert_result = internal_insert_node(insert_node); - if (insert_result.second) { - node_handle_accessor::deactivate(nh); - } - return insert_result; - } - return std::pair<iterator, bool>(end(), false); - } - - iterator insert( const_iterator, node_type&& nh ) { - // Ignore hint - return insert(std::move(nh)).first; - } - - template<typename... Args> - std::pair<iterator, bool> emplace( Args&&... args ) { - return internal_insert(std::forward<Args>(args)...); - } - - template<typename... Args> - iterator emplace_hint( const_iterator, Args&&... args ) { - // Ignore hint - return emplace(std::forward<Args>(args)...).first; - } - - iterator unsafe_erase( iterator pos ) { - std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); - if (extract_result.first) { // node was extracted - delete_value_node(extract_result.first); - return extract_result.second; - } - return end(); - } - - iterator unsafe_erase( const_iterator pos ) { - return unsafe_erase(get_iterator(pos)); - } - - iterator unsafe_erase( const_iterator first, const_iterator last ) { - while (first != last) { - // Unsafe erase returns the iterator which follows the erased one - first = unsafe_erase(first); - } - return get_iterator(first); - } - - size_type unsafe_erase( const key_type& key ) { - return internal_erase(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value - && !std::is_convertible<K, const_iterator>::value - && !std::is_convertible<K, iterator>::value, - size_type>::type unsafe_erase( const K& key ) - { - return internal_erase(key); - } - - node_type unsafe_extract( const_iterator pos ) { - std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); - return extract_result.first ? node_handle_accessor::construct<node_type>(extract_result.first) : node_type(); - } - - node_type unsafe_extract( iterator pos ) { - return unsafe_extract(const_iterator(pos)); - } - - node_type unsafe_extract( const key_type& key ) { - return unsafe_extract(find(key)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value - && !std::is_convertible<K, const_iterator>::value - && !std::is_convertible<K, iterator>::value, - node_type>::type unsafe_extract( const K& key ) - { - return unsafe_extract(find(key)); - } - - iterator lower_bound( const key_type& key ) { - return iterator(internal_get_bound(key, my_compare)); - } - - const_iterator lower_bound( const key_type& key ) const { - return const_iterator(internal_get_bound(key, my_compare)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, iterator>::type lower_bound( const K& key ) { - return iterator(internal_get_bound(key, my_compare)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, const_iterator>::type lower_bound( const K& key ) const { - return const_iterator(internal_get_bound(key, my_compare)); - } - - iterator upper_bound( const key_type& key ) { - return iterator(internal_get_bound(key, not_greater_compare(my_compare))); - } - - const_iterator upper_bound( const key_type& key ) const { - return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, iterator>::type upper_bound( const K& key ) { - return iterator(internal_get_bound(key, not_greater_compare(my_compare))); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, const_iterator>::type upper_bound( const K& key ) const { - return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); - } - - iterator find( const key_type& key ) { - return iterator(internal_find(key)); - } - - const_iterator find( const key_type& key ) const { - return const_iterator(internal_find(key)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { - return iterator(internal_find(key)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { - return const_iterator(internal_find(key)); - } - - size_type count( const key_type& key ) const { - return internal_count(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { - return internal_count(key); - } - - bool contains( const key_type& key ) const { - return find(key) != end(); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { - return find(key) != end(); - } - - void clear() noexcept { - // clear is not thread safe - load can be relaxed - node_ptr head = my_head_ptr.load(std::memory_order_relaxed); - - if (head == nullptr) return; // Head is not allocated => container is empty - - node_ptr current = head->next(0); - - // Delete all value nodes in the container - while (current) { - node_ptr next = current->next(0); - delete_value_node(current); - current = next; - } - - for (size_type level = 0; level < head->height(); ++level) { - head->set_next(level, nullptr); - } - - my_size.store(0, std::memory_order_relaxed); - my_max_height.store(0, std::memory_order_relaxed); - } - - iterator begin() { - return iterator(internal_begin()); - } - - const_iterator begin() const { - return const_iterator(internal_begin()); - } - - const_iterator cbegin() const { - return const_iterator(internal_begin()); - } - - iterator end() { - return iterator(nullptr); - } - - const_iterator end() const { - return const_iterator(nullptr); - } - - const_iterator cend() const { - return const_iterator(nullptr); - } - - size_type size() const { - return my_size.load(std::memory_order_relaxed); - } - - size_type max_size() const { - return node_allocator_traits::max_size(my_node_allocator); - } - - __TBB_nodiscard bool empty() const { - return 0 == size(); - } - - allocator_type get_allocator() const { - return my_node_allocator; - } - - void swap(concurrent_skip_list& other) { - if (this != &other) { - using pocs_type = typename node_allocator_traits::propagate_on_container_swap; - using is_always_equal = typename node_allocator_traits::is_always_equal; - internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); - } - } - - std::pair<iterator, iterator> equal_range(const key_type& key) { - return internal_equal_range(key); - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { - return internal_equal_range(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { - return internal_equal_range(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { - return internal_equal_range(key); - } - - key_compare key_comp() const { return my_compare; } - - value_compare value_comp() const { return container_traits::value_comp(my_compare); } - - class const_range_type { - public: - using size_type = typename concurrent_skip_list::size_type; - using value_type = typename concurrent_skip_list::value_type; - using iterator = typename concurrent_skip_list::const_iterator; - - bool empty() const { - return my_begin.my_node_ptr->next(0) == my_end.my_node_ptr; - } - - bool is_divisible() const { - return my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false; - } - - size_type size() const { return std::distance(my_begin, my_end); } - - const_range_type( const_range_type& r, split) - : my_end(r.my_end) { - my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1)); - my_level = my_begin.my_node_ptr->height(); - r.my_end = my_begin; - } - - const_range_type( const concurrent_skip_list& l) - : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr->height() ) {} - - iterator begin() const { return my_begin; } - iterator end() const { return my_end; } - size_type grainsize() const { return 1; } - - private: - const_iterator my_end; - const_iterator my_begin; - size_type my_level; - }; // class const_range_type - - class range_type : public const_range_type { - public: - using iterator = typename concurrent_skip_list::iterator; - - range_type(range_type& r, split) : const_range_type(r, split()) {} - range_type(const concurrent_skip_list& l) : const_range_type(l) {} - - iterator begin() const { - node_ptr node = const_range_type::begin().my_node_ptr; - return iterator(node); - } - - iterator end() const { - node_ptr node = const_range_type::end().my_node_ptr; - return iterator(node); - } - }; // class range_type - - range_type range() { return range_type(*this); } - const_range_type range() const { return const_range_type(*this); } - -private: - node_ptr internal_begin() const { - node_ptr head = get_head(); - return head == nullptr ? head : head->next(0); - } - - void internal_move(concurrent_skip_list&& other) { - my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_head_ptr.store(nullptr, std::memory_order_relaxed); - - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_size.store(0, std::memory_order_relaxed); - - my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_max_height.store(0, std::memory_order_relaxed); - } - - void internal_move_construct_with_allocator(concurrent_skip_list&& other, - /*is_always_equal = */std::true_type) { - internal_move(std::move(other)); - } - - void internal_move_construct_with_allocator(concurrent_skip_list&& other, - /*is_always_equal = */std::false_type) { - if (my_node_allocator == other.get_allocator()) { - internal_move(std::move(other)); - } else { - my_size.store(0, std::memory_order_relaxed); - my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); - internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); - } - } - - static const key_type& get_key( node_ptr n ) { - __TBB_ASSERT(n, nullptr); - return container_traits::get_key(static_cast<node_ptr>(n)->value()); - } - - template <typename K> - bool found( node_ptr node, const K& key ) const { - return node != nullptr && !my_compare(key, get_key(node)); - } - - template <typename K> - node_ptr internal_find(const K& key) const { - return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key); - } - - template <typename K> - node_ptr internal_find_multi( const K& key ) const { - node_ptr prev = get_head(); - if (prev == nullptr) return nullptr; // If the head node is not allocated - exit - - node_ptr curr = nullptr; - node_ptr old_curr = curr; - - for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { - curr = internal_find_position(h - 1, prev, key, my_compare); - - if (curr != old_curr && found(curr, key)) { - return curr; - } - old_curr = curr; - } - return nullptr; - } - - template <typename K> - node_ptr internal_find_unique( const K& key ) const { - const_iterator it = lower_bound(key); - return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr; - } - - template <typename K> - size_type internal_count( const K& key ) const { - if (allow_multimapping) { - // TODO: reimplement without double traversal - std::pair<const_iterator, const_iterator> r = equal_range(key); - return std::distance(r.first, r.second); - } - return size_type(contains(key) ? 1 : 0); - } - - template <typename K> - std::pair<iterator, iterator> internal_equal_range(const K& key) const { - iterator lb = get_iterator(lower_bound(key)); - auto result = std::make_pair(lb, lb); - - // If the lower bound points to the node with the requested key - if (found(lb.my_node_ptr, key)) { - - if (!allow_multimapping) { - // For unique containers - move the second iterator forward and exit - ++result.second; - } else { - // For multi containers - find the upper bound starting from the lower bound - node_ptr prev = lb.my_node_ptr; - node_ptr curr = nullptr; - not_greater_compare cmp(my_compare); - - // Start from the lower bound of the range - for (size_type h = prev->height(); h > 0; --h) { - curr = prev->next(h - 1); - while (curr && cmp(get_key(curr), key)) { - prev = curr; - // If the height of the next node is greater than the current one - jump to its height - if (h < curr->height()) { - h = curr->height(); - } - curr = prev->next(h - 1); - } - } - result.second = iterator(curr); - } - } - - return result; - } - - // Finds position on the level using comparator cmp starting from the node prev - template <typename K, typename Comparator> - node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key, - const Comparator& cmp ) const { - __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); - node_ptr curr = prev->next(level); - - while (curr && cmp(get_key(curr), key)) { - prev = curr; - __TBB_ASSERT(level < prev->height(), nullptr); - curr = prev->next(level); - } - - return curr; - } - - // The same as previous overload, but allows index_number comparison - template <typename Comparator> - node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node, - const Comparator& cmp ) const { - __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); - node_ptr curr = prev->next(level); - - while (curr && cmp(get_key(curr), get_key(node))) { - if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) { - break; - } - - prev = curr; - __TBB_ASSERT(level < prev->height(), nullptr); - curr = prev->next(level); - } - return curr; - } - - template <typename Comparator> - void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key, - const Comparator& cmp, node_ptr head ) { - - size_type curr_max_height = my_max_height.load(std::memory_order_acquire); - size_type node_height = node->height(); - if (curr_max_height < node_height) { - std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head); - std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr); - } - - node_ptr prev = head; - for (size_type level = curr_max_height; level > 0; --level) { - node_ptr curr = internal_find_position(level - 1, prev, key, cmp); - prev_nodes[level - 1] = prev; - curr_nodes[level - 1] = curr; - } - } - - void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) { - node_ptr head = create_head_if_necessary(); - prev_nodes.fill(head); - - node_ptr prev = head; - for (size_type level = node->height(); level > 0; --level) { - while (prev->next(level - 1) != node) { - prev = prev->next(level - 1); - } - prev_nodes[level - 1] = prev; - } - } - - struct not_greater_compare { - const key_compare& my_less_compare; - - not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {} - - template <typename K1, typename K2> - bool operator()( const K1& first, const K2& second ) const { - return !my_less_compare(second, first); - } - }; - - not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) { - return not_greater_compare(my_compare); - } - - key_compare select_comparator( /*allow_multimapping = */ std::false_type ) { - return my_compare; - } - - template<typename... Args> - std::pair<iterator, bool> internal_insert( Args&&... args ) { - node_ptr new_node = create_value_node(std::forward<Args>(args)...); - std::pair<iterator, bool> insert_result = internal_insert_node(new_node); - if (!insert_result.second) { - delete_value_node(new_node); - } - return insert_result; - } - - std::pair<iterator, bool> internal_insert_node( node_ptr new_node ) { - array_type prev_nodes; - array_type curr_nodes; - size_type new_height = new_node->height(); - auto compare = select_comparator(std::integral_constant<bool, allow_multimapping>{}); - - node_ptr head_node = create_head_if_necessary(); - - for (;;) { - fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node); - - node_ptr prev = prev_nodes[0]; - node_ptr next = curr_nodes[0]; - - if (allow_multimapping) { - new_node->set_index_number(prev->index_number() + 1); - } else { - if (found(next, get_key(new_node))) { - return std::pair<iterator, bool>(iterator(next), false); - } - } - - new_node->set_next(0, next); - if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) { - continue; - } - - // If the node was successfully linked on the first level - it will be linked on other levels - // Insertion cannot fail starting from this point - - // If the height of inserted node is greater than maximum - increase maximum - size_type max_height = my_max_height.load(std::memory_order_acquire); - for (;;) { - if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) { - // If the maximum was successfully updated by current thread - // or by an other thread for the value, greater or equal to new_height - break; - } - } - - for (std::size_t level = 1; level < new_height; ++level) { - // Link the node on upper levels - for (;;) { - prev = prev_nodes[level]; - next = static_cast<node_ptr>(curr_nodes[level]); - - new_node->set_next(level, next); - __TBB_ASSERT(new_node->height() > level, "Internal structure break"); - if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) { - break; - } - - for (size_type lev = level; lev != new_height; ++lev ) { - curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare); - } - } - } - ++my_size; - return std::pair<iterator, bool>(iterator(new_node), true); - } - } - - template <typename K, typename Comparator> - node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const { - node_ptr prev = get_head(); - if (prev == nullptr) return nullptr; // If the head node is not allocated - exit - - node_ptr curr = nullptr; - - for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { - curr = internal_find_position(h - 1, prev, key, cmp); - } - - return curr; - } - - template <typename K> - size_type internal_erase( const K& key ) { - auto eq = equal_range(key); - size_type old_size = size(); - unsafe_erase(eq.first, eq.second); - return old_size - size(); - } - - // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted - std::pair<node_ptr, node_ptr> internal_extract( const_iterator it ) { - std::pair<node_ptr, node_ptr> result(nullptr, nullptr); - if ( it != end() ) { - array_type prev_nodes; - - node_ptr erase_node = it.my_node_ptr; - node_ptr next_node = erase_node->next(0); - fill_prev_array_for_existing_node(prev_nodes, erase_node); - - for (size_type level = 0; level < erase_node->height(); ++level) { - prev_nodes[level]->set_next(level, erase_node->next(level)); - erase_node->set_next(level, nullptr); - } - my_size.fetch_sub(1, std::memory_order_relaxed); - - result.first = erase_node; - result.second = next_node; - } - return result; - } - -protected: - template<typename SourceType> - void internal_merge( SourceType&& source ) { - using source_type = typename std::decay<SourceType>::type; - using source_iterator = typename source_type::iterator; - static_assert((std::is_same<node_type, typename source_type::node_type>::value), "Incompatible containers cannot be merged"); - - for (source_iterator it = source.begin(); it != source.end();) { - source_iterator where = it++; - if (allow_multimapping || !contains(container_traits::get_key(*where))) { - node_type handle = source.unsafe_extract(where); - __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); - - if (!insert(std::move(handle)).second) { - //If the insertion fails - return the node into source - source.insert(std::move(handle)); - } - __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion"); - } - } - } - -private: - void internal_copy( const concurrent_skip_list& other ) { - internal_copy(other.begin(), other.end()); - } - - template<typename Iterator> - void internal_copy( Iterator first, Iterator last ) { - try_call([&] { - for (auto it = first; it != last; ++it) { - insert(*it); - } - }).on_exception([&] { - clear(); - node_ptr head = my_head_ptr.load(std::memory_order_relaxed); - if (head != nullptr) { - delete_node(head); - } - }); - } - - static size_type calc_node_size( size_type height ) { - static_assert(alignof(list_node_type) >= alignof(typename list_node_type::atomic_node_ptr), "Incorrect alignment"); - return sizeof(list_node_type) + height * sizeof(typename list_node_type::atomic_node_ptr); - } - - node_ptr create_node( size_type height ) { - size_type sz = calc_node_size(height); - node_ptr node = reinterpret_cast<node_ptr>(node_allocator_traits::allocate(my_node_allocator, sz)); - node_allocator_traits::construct(my_node_allocator, node, height, my_node_allocator); - return node; - } - - template <typename... Args> - node_ptr create_value_node( Args&&... args ) { - node_ptr node = create_node(my_rng()); - - // try_call API is not convenient here due to broken - // variadic capture on GCC 4.8.5 - auto value_guard = make_raii_guard([&] { - delete_node(node); - }); - - // Construct the value inside the node - node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward<Args>(args)...); - value_guard.dismiss(); - return node; - } - - node_ptr create_head_node() { - return create_node(max_level); - } - - void delete_node( node_ptr node ) { - size_type sz = calc_node_size(node->height()); - - // Destroy the node - node_allocator_traits::destroy(my_node_allocator, node); - // Deallocate the node - node_allocator_traits::deallocate(my_node_allocator, reinterpret_cast<std::uint8_t*>(node), sz); - } - - void delete_value_node( node_ptr node ) { - // Destroy the value inside the node - node_allocator_traits::destroy(my_node_allocator, node->storage()); - delete_node(node); - } - - node_ptr get_head() const { - return my_head_ptr.load(std::memory_order_acquire); - } - - node_ptr create_head_if_necessary() { - node_ptr current_head = get_head(); - if (current_head == nullptr) { - // Head node was not created - create it - node_ptr new_head = create_head_node(); - if (my_head_ptr.compare_exchange_strong(current_head, new_head)) { - current_head = new_head; - } else { - // If an other thread has already created the head node - destroy new_head - // current_head now points to the actual head node - delete_node(new_head); - } - } - __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr); - __TBB_ASSERT(current_head != nullptr, nullptr); - return current_head; - } - - static iterator get_iterator( const_iterator it ) { - return iterator(it.my_node_ptr); - } - - void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) { - internal_move(std::move(other)); - } - - void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) { - if (my_node_allocator == other.my_node_allocator) { - internal_move(std::move(other)); - } else { - internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); - } - } - - void internal_swap_fields( concurrent_skip_list& other ) { - using std::swap; - swap_allocators(my_node_allocator, other.my_node_allocator); - swap(my_compare, other.my_compare); - swap(my_rng, other.my_rng); - - swap_atomics_relaxed(my_head_ptr, other.my_head_ptr); - swap_atomics_relaxed(my_size, other.my_size); - swap_atomics_relaxed(my_max_height, other.my_max_height); - } - - void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) { - internal_swap_fields(other); - } - - void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) { - __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed"); - internal_swap_fields(other); - } - - node_allocator_type my_node_allocator; - key_compare my_compare; - random_level_generator_type my_rng; - std::atomic<list_node_type*> my_head_ptr; - std::atomic<size_type> my_size; - std::atomic<size_type> my_max_height; - - template<typename OtherTraits> - friend class concurrent_skip_list; -}; // class concurrent_skip_list - -template <typename Traits> -bool operator==( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - if (lhs.size() != rhs.size()) return false; -#if _MSC_VER - // Passing "unchecked" iterators to std::equal with 3 parameters - // causes compiler warnings. - // The workaround is to use overload with 4 parameters, which is - // available since C++14 - minimally supported version on MSVC - return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); -#else - return std::equal(lhs.begin(), lhs.end(), rhs.begin()); -#endif -} - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template <typename Traits> -bool operator!=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return !(lhs == rhs); -} -#endif - -#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT -template <typename Traits> -tbb::detail::synthesized_three_way_result<typename Traits::value_type> -operator<=>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), - rhs.begin(), rhs.end(), - tbb::detail::synthesized_three_way_comparator{}); -} -#else -template <typename Traits> -bool operator<( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); -} - -template <typename Traits> -bool operator>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return rhs < lhs; -} - -template <typename Traits> -bool operator<=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return !(rhs < lhs); -} - -template <typename Traits> -bool operator>=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { - return !(lhs < rhs); -} -#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT - -// Generates a number from the interval [0, MaxLevel). -template <std::size_t MaxLevel> -class concurrent_geometric_level_generator { -public: - static constexpr std::size_t max_level = MaxLevel; - // TODO: modify the algorithm to accept other values of max_level - static_assert(max_level == 32, "Incompatible max_level for rng"); - - concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {} - - std::size_t operator()() { - // +1 is required to pass at least 1 into log2 (log2(0) is undefined) - // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1) - std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1; - __TBB_ASSERT(result <= max_level, nullptr); - return result; - } - -private: - tbb::enumerable_thread_specific<std::minstd_rand> engines; -}; - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(pop) // warning 4127 is back -#endif - -#endif // __TBB_detail__concurrent_skip_list_H +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_skip_list_H +#define __TBB_detail__concurrent_skip_list_H + +#if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H) +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_config.h" +#include "_range_common.h" +#include "_allocator_traits.h" +#include "_template_helpers.h" +#include "_node_handle.h" +#include "_containers_helpers.h" +#include "_assert.h" +#include "_exception.h" +#include "../enumerable_thread_specific.h" +#include <utility> +#include <initializer_list> +#include <atomic> +#include <array> +#include <type_traits> +#include <random> // Need std::geometric_distribution +#include <algorithm> // Need std::equal and std::lexicographical_compare +#include <cstdint> +#if __TBB_CPP20_COMPARISONS_PRESENT +#include <compare> +#endif + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Value, typename Allocator> +class skip_list_node { + using node_ptr = skip_list_node*; +public: + using value_type = Value; + using atomic_node_ptr = std::atomic<node_ptr>; + using size_type = std::size_t; + using container_allocator_type = Allocator; + + using reference = value_type&; + using const_reference = const value_type&; +private: + using allocator_traits = tbb::detail::allocator_traits<container_allocator_type>; + + // Allocator is the same as the container allocator=> allocates unitptr_t + // It is required to rebind it to value_type to get the correct pointer and const_pointer + using value_allocator_traits = typename allocator_traits::template rebind_traits<value_type>; +public: + using pointer = typename value_allocator_traits::pointer; + using const_pointer = typename value_allocator_traits::const_pointer; + + skip_list_node( size_type levels, container_allocator_type& alloc ) + : my_container_allocator(alloc), my_height(levels), my_index_number(0) + { + for (size_type l = 0; l < my_height; ++l) { + allocator_traits::construct(my_container_allocator, &get_atomic_next(l), nullptr); + } + } + + ~skip_list_node() { + for (size_type l = 0; l < my_height; ++l) { + allocator_traits::destroy(my_container_allocator, &get_atomic_next(l)); + } + } + + skip_list_node( const skip_list_node& ) = delete; + skip_list_node( skip_list_node&& ) = delete; + skip_list_node& operator=( const skip_list_node& ) = delete; + skip_list_node& operator=( skip_list_node&& ) = delete; + + pointer storage() { + return &my_value; + } + + reference value() { + return *storage(); + } + + node_ptr next( size_type level ) const { + node_ptr res = get_atomic_next(level).load(std::memory_order_acquire); + __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure"); + return res; + } + + atomic_node_ptr& atomic_next( size_type level ) { + atomic_node_ptr& res = get_atomic_next(level); +#if TBB_USE_DEBUG + node_ptr node = res.load(std::memory_order_acquire); + __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure"); +#endif + return res; + } + + void set_next( size_type level, node_ptr n ) { + __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure"); + get_atomic_next(level).store(n, std::memory_order_relaxed); + } + + size_type height() const { + return my_height; + } + + void set_index_number( size_type index_num ) { + my_index_number = index_num; + } + + size_type index_number() const { + return my_index_number; + } + +private: + atomic_node_ptr& get_atomic_next( size_type level ) { + atomic_node_ptr* arr = reinterpret_cast<atomic_node_ptr*>(this + 1); + return arr[level]; + } + + const atomic_node_ptr& get_atomic_next( size_type level ) const { + const atomic_node_ptr* arr = reinterpret_cast<const atomic_node_ptr*>(this + 1); + return arr[level]; + } + + container_allocator_type& my_container_allocator; + union { + value_type my_value; + }; + size_type my_height; + size_type my_index_number; +}; // class skip_list_node + +template <typename NodeType, typename ValueType> +class skip_list_iterator { + using node_type = NodeType; + using node_ptr = node_type*; +public: + using iterator_category = std::forward_iterator_tag; + using value_type = ValueType; + + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + skip_list_iterator() : skip_list_iterator(nullptr) {} + + skip_list_iterator( const skip_list_iterator<node_type, typename node_type::value_type>& other ) + : my_node_ptr(other.my_node_ptr) {} + + skip_list_iterator& operator=( const skip_list_iterator<node_type, typename node_type::value_type>& other ) { + my_node_ptr = other.my_node_ptr; + return *this; + } + + reference operator*() const { return my_node_ptr->value(); } + pointer operator->() const { return my_node_ptr->storage(); } + + skip_list_iterator& operator++() { + __TBB_ASSERT(my_node_ptr != nullptr, nullptr); + my_node_ptr = my_node_ptr->next(0); + return *this; + } + + skip_list_iterator operator++(int) { + skip_list_iterator tmp = *this; + ++*this; + return tmp; + } + +private: + skip_list_iterator(node_type* n) : my_node_ptr(n) {} + + node_ptr my_node_ptr; + + template <typename Traits> + friend class concurrent_skip_list; + + template <typename N, typename V> + friend class skip_list_iterator; + + friend class const_range; + friend class range; + + friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { + return lhs.my_node_ptr == rhs.my_node_ptr; + } + + friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { + return lhs.my_node_ptr != rhs.my_node_ptr; + } +}; // class skip_list_iterator + +template <typename Traits> +class concurrent_skip_list { +protected: + using container_traits = Traits; + using self_type = concurrent_skip_list<container_traits>; + using allocator_type = typename container_traits::allocator_type; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using key_compare = typename container_traits::compare_type; + using value_compare = typename container_traits::value_compare; + using key_type = typename container_traits::key_type; + using value_type = typename container_traits::value_type; + static_assert(std::is_same<value_type, typename allocator_type::value_type>::value, + "value_type of the container should be the same as its allocator"); + + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + static constexpr size_type max_level = container_traits::max_level; + + using node_allocator_type = typename allocator_traits_type::template rebind_alloc<std::uint8_t>; + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + + using list_node_type = skip_list_node<value_type, node_allocator_type>; + using node_type = node_handle<key_type, value_type, list_node_type, allocator_type>; + + using iterator = skip_list_iterator<list_node_type, value_type>; + using const_iterator = skip_list_iterator<list_node_type, const value_type>; + + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using random_level_generator_type = typename container_traits::random_level_generator_type; + + using node_ptr = list_node_type*; + + using array_type = std::array<node_ptr, max_level>; +private: + template <typename T> + using is_transparent = dependent_bool<comp_is_transparent<key_compare>, T>; +public: + static constexpr bool allow_multimapping = container_traits::allow_multimapping; + + concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {} + + explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() ) + : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {} + + explicit concurrent_skip_list( const allocator_type& alloc ) + : concurrent_skip_list(key_compare(), alloc) {} + + template<typename InputIterator> + concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(), + const allocator_type& alloc = allocator_type() ) + : concurrent_skip_list(comp, alloc) + { + internal_copy(first, last); + } + + template <typename InputIterator> + concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc ) + : concurrent_skip_list(first, last, key_compare(), alloc) {} + + concurrent_skip_list( std::initializer_list<value_type> init, const key_compare& comp = key_compare(), + const allocator_type& alloc = allocator_type() ) + : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {} + + concurrent_skip_list( std::initializer_list<value_type> init, const allocator_type& alloc ) + : concurrent_skip_list(init, key_compare(), alloc) {} + + concurrent_skip_list( const concurrent_skip_list& other ) + : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())), + my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), + my_size(0), my_max_height(0) + { + internal_copy(other); + __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); + } + + concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc ) + : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), + my_size(0), my_max_height(0) + { + internal_copy(other); + __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); + } + + concurrent_skip_list( concurrent_skip_list&& other ) + : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare), + my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move + { + internal_move(std::move(other)); + } + + concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc ) + : my_node_allocator(alloc), my_compare(other.my_compare), + my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) + { + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_construct_with_allocator(std::move(other), is_always_equal()); + } + + ~concurrent_skip_list() { + clear(); + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + if (head != nullptr) { + delete_node(head); + } + } + + concurrent_skip_list& operator=( const concurrent_skip_list& other ) { + if (this != &other) { + clear(); + copy_assign_allocators(my_node_allocator, other.my_node_allocator); + my_compare = other.my_compare; + my_rng = other.my_rng; + internal_copy(other); + } + return *this; + } + + concurrent_skip_list& operator=( concurrent_skip_list&& other ) { + if (this != &other) { + clear(); + my_compare = std::move(other.my_compare); + my_rng = std::move(other.my_rng); + + move_assign_allocators(my_node_allocator, other.my_node_allocator); + using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; + using is_always_equal = typename node_allocator_traits::is_always_equal; + internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); + } + return *this; + } + + concurrent_skip_list& operator=( std::initializer_list<value_type> il ) + { + clear(); + insert(il.begin(),il.end()); + return *this; + } + + std::pair<iterator, bool> insert( const value_type& value ) { + return internal_insert(value); + } + + std::pair<iterator, bool> insert( value_type&& value ) { + return internal_insert(std::move(value)); + } + + iterator insert( const_iterator, const_reference value ) { + // Ignore hint + return insert(value).first; + } + + iterator insert( const_iterator, value_type&& value ) { + // Ignore hint + return insert(std::move(value)).first; + } + + template<typename InputIterator> + void insert( InputIterator first, InputIterator last ) { + while (first != last) { + insert(*first); + ++first; + } + } + + void insert( std::initializer_list<value_type> init ) { + insert(init.begin(), init.end()); + } + + std::pair<iterator, bool> insert( node_type&& nh ) { + if (!nh.empty()) { + auto insert_node = node_handle_accessor::get_node_ptr(nh); + std::pair<iterator, bool> insert_result = internal_insert_node(insert_node); + if (insert_result.second) { + node_handle_accessor::deactivate(nh); + } + return insert_result; + } + return std::pair<iterator, bool>(end(), false); + } + + iterator insert( const_iterator, node_type&& nh ) { + // Ignore hint + return insert(std::move(nh)).first; + } + + template<typename... Args> + std::pair<iterator, bool> emplace( Args&&... args ) { + return internal_insert(std::forward<Args>(args)...); + } + + template<typename... Args> + iterator emplace_hint( const_iterator, Args&&... args ) { + // Ignore hint + return emplace(std::forward<Args>(args)...).first; + } + + iterator unsafe_erase( iterator pos ) { + std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); + if (extract_result.first) { // node was extracted + delete_value_node(extract_result.first); + return extract_result.second; + } + return end(); + } + + iterator unsafe_erase( const_iterator pos ) { + return unsafe_erase(get_iterator(pos)); + } + + iterator unsafe_erase( const_iterator first, const_iterator last ) { + while (first != last) { + // Unsafe erase returns the iterator which follows the erased one + first = unsafe_erase(first); + } + return get_iterator(first); + } + + size_type unsafe_erase( const key_type& key ) { + return internal_erase(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + size_type>::type unsafe_erase( const K& key ) + { + return internal_erase(key); + } + + node_type unsafe_extract( const_iterator pos ) { + std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); + return extract_result.first ? node_handle_accessor::construct<node_type>(extract_result.first) : node_type(); + } + + node_type unsafe_extract( iterator pos ) { + return unsafe_extract(const_iterator(pos)); + } + + node_type unsafe_extract( const key_type& key ) { + return unsafe_extract(find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + node_type>::type unsafe_extract( const K& key ) + { + return unsafe_extract(find(key)); + } + + iterator lower_bound( const key_type& key ) { + return iterator(internal_get_bound(key, my_compare)); + } + + const_iterator lower_bound( const key_type& key ) const { + return const_iterator(internal_get_bound(key, my_compare)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type lower_bound( const K& key ) { + return iterator(internal_get_bound(key, my_compare)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type lower_bound( const K& key ) const { + return const_iterator(internal_get_bound(key, my_compare)); + } + + iterator upper_bound( const key_type& key ) { + return iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + const_iterator upper_bound( const key_type& key ) const { + return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type upper_bound( const K& key ) { + return iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type upper_bound( const K& key ) const { + return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + iterator find( const key_type& key ) { + return iterator(internal_find(key)); + } + + const_iterator find( const key_type& key ) const { + return const_iterator(internal_find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { + return iterator(internal_find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { + return const_iterator(internal_find(key)); + } + + size_type count( const key_type& key ) const { + return internal_count(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { + return internal_count(key); + } + + bool contains( const key_type& key ) const { + return find(key) != end(); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { + return find(key) != end(); + } + + void clear() noexcept { + // clear is not thread safe - load can be relaxed + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + + if (head == nullptr) return; // Head is not allocated => container is empty + + node_ptr current = head->next(0); + + // Delete all value nodes in the container + while (current) { + node_ptr next = current->next(0); + delete_value_node(current); + current = next; + } + + for (size_type level = 0; level < head->height(); ++level) { + head->set_next(level, nullptr); + } + + my_size.store(0, std::memory_order_relaxed); + my_max_height.store(0, std::memory_order_relaxed); + } + + iterator begin() { + return iterator(internal_begin()); + } + + const_iterator begin() const { + return const_iterator(internal_begin()); + } + + const_iterator cbegin() const { + return const_iterator(internal_begin()); + } + + iterator end() { + return iterator(nullptr); + } + + const_iterator end() const { + return const_iterator(nullptr); + } + + const_iterator cend() const { + return const_iterator(nullptr); + } + + size_type size() const { + return my_size.load(std::memory_order_relaxed); + } + + size_type max_size() const { + return node_allocator_traits::max_size(my_node_allocator); + } + + __TBB_nodiscard bool empty() const { + return 0 == size(); + } + + allocator_type get_allocator() const { + return my_node_allocator; + } + + void swap(concurrent_skip_list& other) { + if (this != &other) { + using pocs_type = typename node_allocator_traits::propagate_on_container_swap; + using is_always_equal = typename node_allocator_traits::is_always_equal; + internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); + } + } + + std::pair<iterator, iterator> equal_range(const key_type& key) { + return internal_equal_range(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return internal_equal_range(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { + return internal_equal_range(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { + return internal_equal_range(key); + } + + key_compare key_comp() const { return my_compare; } + + value_compare value_comp() const { return container_traits::value_comp(my_compare); } + + class const_range_type { + public: + using size_type = typename concurrent_skip_list::size_type; + using value_type = typename concurrent_skip_list::value_type; + using iterator = typename concurrent_skip_list::const_iterator; + + bool empty() const { + return my_begin.my_node_ptr->next(0) == my_end.my_node_ptr; + } + + bool is_divisible() const { + return my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false; + } + + size_type size() const { return std::distance(my_begin, my_end); } + + const_range_type( const_range_type& r, split) + : my_end(r.my_end) { + my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1)); + my_level = my_begin.my_node_ptr->height(); + r.my_end = my_begin; + } + + const_range_type( const concurrent_skip_list& l) + : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr->height() ) {} + + iterator begin() const { return my_begin; } + iterator end() const { return my_end; } + size_type grainsize() const { return 1; } + + private: + const_iterator my_end; + const_iterator my_begin; + size_type my_level; + }; // class const_range_type + + class range_type : public const_range_type { + public: + using iterator = typename concurrent_skip_list::iterator; + + range_type(range_type& r, split) : const_range_type(r, split()) {} + range_type(const concurrent_skip_list& l) : const_range_type(l) {} + + iterator begin() const { + node_ptr node = const_range_type::begin().my_node_ptr; + return iterator(node); + } + + iterator end() const { + node_ptr node = const_range_type::end().my_node_ptr; + return iterator(node); + } + }; // class range_type + + range_type range() { return range_type(*this); } + const_range_type range() const { return const_range_type(*this); } + +private: + node_ptr internal_begin() const { + node_ptr head = get_head(); + return head == nullptr ? head : head->next(0); + } + + void internal_move(concurrent_skip_list&& other) { + my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_head_ptr.store(nullptr, std::memory_order_relaxed); + + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(0, std::memory_order_relaxed); + + my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_max_height.store(0, std::memory_order_relaxed); + } + + void internal_move_construct_with_allocator(concurrent_skip_list&& other, + /*is_always_equal = */std::true_type) { + internal_move(std::move(other)); + } + + void internal_move_construct_with_allocator(concurrent_skip_list&& other, + /*is_always_equal = */std::false_type) { + if (my_node_allocator == other.get_allocator()) { + internal_move(std::move(other)); + } else { + my_size.store(0, std::memory_order_relaxed); + my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); + } + } + + static const key_type& get_key( node_ptr n ) { + __TBB_ASSERT(n, nullptr); + return container_traits::get_key(static_cast<node_ptr>(n)->value()); + } + + template <typename K> + bool found( node_ptr node, const K& key ) const { + return node != nullptr && !my_compare(key, get_key(node)); + } + + template <typename K> + node_ptr internal_find(const K& key) const { + return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key); + } + + template <typename K> + node_ptr internal_find_multi( const K& key ) const { + node_ptr prev = get_head(); + if (prev == nullptr) return nullptr; // If the head node is not allocated - exit + + node_ptr curr = nullptr; + node_ptr old_curr = curr; + + for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { + curr = internal_find_position(h - 1, prev, key, my_compare); + + if (curr != old_curr && found(curr, key)) { + return curr; + } + old_curr = curr; + } + return nullptr; + } + + template <typename K> + node_ptr internal_find_unique( const K& key ) const { + const_iterator it = lower_bound(key); + return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr; + } + + template <typename K> + size_type internal_count( const K& key ) const { + if (allow_multimapping) { + // TODO: reimplement without double traversal + std::pair<const_iterator, const_iterator> r = equal_range(key); + return std::distance(r.first, r.second); + } + return size_type(contains(key) ? 1 : 0); + } + + template <typename K> + std::pair<iterator, iterator> internal_equal_range(const K& key) const { + iterator lb = get_iterator(lower_bound(key)); + auto result = std::make_pair(lb, lb); + + // If the lower bound points to the node with the requested key + if (found(lb.my_node_ptr, key)) { + + if (!allow_multimapping) { + // For unique containers - move the second iterator forward and exit + ++result.second; + } else { + // For multi containers - find the upper bound starting from the lower bound + node_ptr prev = lb.my_node_ptr; + node_ptr curr = nullptr; + not_greater_compare cmp(my_compare); + + // Start from the lower bound of the range + for (size_type h = prev->height(); h > 0; --h) { + curr = prev->next(h - 1); + while (curr && cmp(get_key(curr), key)) { + prev = curr; + // If the height of the next node is greater than the current one - jump to its height + if (h < curr->height()) { + h = curr->height(); + } + curr = prev->next(h - 1); + } + } + result.second = iterator(curr); + } + } + + return result; + } + + // Finds position on the level using comparator cmp starting from the node prev + template <typename K, typename Comparator> + node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key, + const Comparator& cmp ) const { + __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); + node_ptr curr = prev->next(level); + + while (curr && cmp(get_key(curr), key)) { + prev = curr; + __TBB_ASSERT(level < prev->height(), nullptr); + curr = prev->next(level); + } + + return curr; + } + + // The same as previous overload, but allows index_number comparison + template <typename Comparator> + node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node, + const Comparator& cmp ) const { + __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); + node_ptr curr = prev->next(level); + + while (curr && cmp(get_key(curr), get_key(node))) { + if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) { + break; + } + + prev = curr; + __TBB_ASSERT(level < prev->height(), nullptr); + curr = prev->next(level); + } + return curr; + } + + template <typename Comparator> + void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key, + const Comparator& cmp, node_ptr head ) { + + size_type curr_max_height = my_max_height.load(std::memory_order_acquire); + size_type node_height = node->height(); + if (curr_max_height < node_height) { + std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head); + std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr); + } + + node_ptr prev = head; + for (size_type level = curr_max_height; level > 0; --level) { + node_ptr curr = internal_find_position(level - 1, prev, key, cmp); + prev_nodes[level - 1] = prev; + curr_nodes[level - 1] = curr; + } + } + + void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) { + node_ptr head = create_head_if_necessary(); + prev_nodes.fill(head); + + node_ptr prev = head; + for (size_type level = node->height(); level > 0; --level) { + while (prev->next(level - 1) != node) { + prev = prev->next(level - 1); + } + prev_nodes[level - 1] = prev; + } + } + + struct not_greater_compare { + const key_compare& my_less_compare; + + not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {} + + template <typename K1, typename K2> + bool operator()( const K1& first, const K2& second ) const { + return !my_less_compare(second, first); + } + }; + + not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) { + return not_greater_compare(my_compare); + } + + key_compare select_comparator( /*allow_multimapping = */ std::false_type ) { + return my_compare; + } + + template<typename... Args> + std::pair<iterator, bool> internal_insert( Args&&... args ) { + node_ptr new_node = create_value_node(std::forward<Args>(args)...); + std::pair<iterator, bool> insert_result = internal_insert_node(new_node); + if (!insert_result.second) { + delete_value_node(new_node); + } + return insert_result; + } + + std::pair<iterator, bool> internal_insert_node( node_ptr new_node ) { + array_type prev_nodes; + array_type curr_nodes; + size_type new_height = new_node->height(); + auto compare = select_comparator(std::integral_constant<bool, allow_multimapping>{}); + + node_ptr head_node = create_head_if_necessary(); + + for (;;) { + fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node); + + node_ptr prev = prev_nodes[0]; + node_ptr next = curr_nodes[0]; + + if (allow_multimapping) { + new_node->set_index_number(prev->index_number() + 1); + } else { + if (found(next, get_key(new_node))) { + return std::pair<iterator, bool>(iterator(next), false); + } + } + + new_node->set_next(0, next); + if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) { + continue; + } + + // If the node was successfully linked on the first level - it will be linked on other levels + // Insertion cannot fail starting from this point + + // If the height of inserted node is greater than maximum - increase maximum + size_type max_height = my_max_height.load(std::memory_order_acquire); + for (;;) { + if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) { + // If the maximum was successfully updated by current thread + // or by an other thread for the value, greater or equal to new_height + break; + } + } + + for (std::size_t level = 1; level < new_height; ++level) { + // Link the node on upper levels + for (;;) { + prev = prev_nodes[level]; + next = static_cast<node_ptr>(curr_nodes[level]); + + new_node->set_next(level, next); + __TBB_ASSERT(new_node->height() > level, "Internal structure break"); + if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) { + break; + } + + for (size_type lev = level; lev != new_height; ++lev ) { + curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare); + } + } + } + ++my_size; + return std::pair<iterator, bool>(iterator(new_node), true); + } + } + + template <typename K, typename Comparator> + node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const { + node_ptr prev = get_head(); + if (prev == nullptr) return nullptr; // If the head node is not allocated - exit + + node_ptr curr = nullptr; + + for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { + curr = internal_find_position(h - 1, prev, key, cmp); + } + + return curr; + } + + template <typename K> + size_type internal_erase( const K& key ) { + auto eq = equal_range(key); + size_type old_size = size(); + unsafe_erase(eq.first, eq.second); + return old_size - size(); + } + + // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted + std::pair<node_ptr, node_ptr> internal_extract( const_iterator it ) { + std::pair<node_ptr, node_ptr> result(nullptr, nullptr); + if ( it != end() ) { + array_type prev_nodes; + + node_ptr erase_node = it.my_node_ptr; + node_ptr next_node = erase_node->next(0); + fill_prev_array_for_existing_node(prev_nodes, erase_node); + + for (size_type level = 0; level < erase_node->height(); ++level) { + prev_nodes[level]->set_next(level, erase_node->next(level)); + erase_node->set_next(level, nullptr); + } + my_size.fetch_sub(1, std::memory_order_relaxed); + + result.first = erase_node; + result.second = next_node; + } + return result; + } + +protected: + template<typename SourceType> + void internal_merge( SourceType&& source ) { + using source_type = typename std::decay<SourceType>::type; + using source_iterator = typename source_type::iterator; + static_assert((std::is_same<node_type, typename source_type::node_type>::value), "Incompatible containers cannot be merged"); + + for (source_iterator it = source.begin(); it != source.end();) { + source_iterator where = it++; + if (allow_multimapping || !contains(container_traits::get_key(*where))) { + node_type handle = source.unsafe_extract(where); + __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); + + if (!insert(std::move(handle)).second) { + //If the insertion fails - return the node into source + source.insert(std::move(handle)); + } + __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion"); + } + } + } + +private: + void internal_copy( const concurrent_skip_list& other ) { + internal_copy(other.begin(), other.end()); + } + + template<typename Iterator> + void internal_copy( Iterator first, Iterator last ) { + try_call([&] { + for (auto it = first; it != last; ++it) { + insert(*it); + } + }).on_exception([&] { + clear(); + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + if (head != nullptr) { + delete_node(head); + } + }); + } + + static size_type calc_node_size( size_type height ) { + static_assert(alignof(list_node_type) >= alignof(typename list_node_type::atomic_node_ptr), "Incorrect alignment"); + return sizeof(list_node_type) + height * sizeof(typename list_node_type::atomic_node_ptr); + } + + node_ptr create_node( size_type height ) { + size_type sz = calc_node_size(height); + node_ptr node = reinterpret_cast<node_ptr>(node_allocator_traits::allocate(my_node_allocator, sz)); + node_allocator_traits::construct(my_node_allocator, node, height, my_node_allocator); + return node; + } + + template <typename... Args> + node_ptr create_value_node( Args&&... args ) { + node_ptr node = create_node(my_rng()); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + delete_node(node); + }); + + // Construct the value inside the node + node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward<Args>(args)...); + value_guard.dismiss(); + return node; + } + + node_ptr create_head_node() { + return create_node(max_level); + } + + void delete_node( node_ptr node ) { + size_type sz = calc_node_size(node->height()); + + // Destroy the node + node_allocator_traits::destroy(my_node_allocator, node); + // Deallocate the node + node_allocator_traits::deallocate(my_node_allocator, reinterpret_cast<std::uint8_t*>(node), sz); + } + + void delete_value_node( node_ptr node ) { + // Destroy the value inside the node + node_allocator_traits::destroy(my_node_allocator, node->storage()); + delete_node(node); + } + + node_ptr get_head() const { + return my_head_ptr.load(std::memory_order_acquire); + } + + node_ptr create_head_if_necessary() { + node_ptr current_head = get_head(); + if (current_head == nullptr) { + // Head node was not created - create it + node_ptr new_head = create_head_node(); + if (my_head_ptr.compare_exchange_strong(current_head, new_head)) { + current_head = new_head; + } else { + // If an other thread has already created the head node - destroy new_head + // current_head now points to the actual head node + delete_node(new_head); + } + } + __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr); + __TBB_ASSERT(current_head != nullptr, nullptr); + return current_head; + } + + static iterator get_iterator( const_iterator it ) { + return iterator(it.my_node_ptr); + } + + void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) { + internal_move(std::move(other)); + } + + void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) { + if (my_node_allocator == other.my_node_allocator) { + internal_move(std::move(other)); + } else { + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); + } + } + + void internal_swap_fields( concurrent_skip_list& other ) { + using std::swap; + swap_allocators(my_node_allocator, other.my_node_allocator); + swap(my_compare, other.my_compare); + swap(my_rng, other.my_rng); + + swap_atomics_relaxed(my_head_ptr, other.my_head_ptr); + swap_atomics_relaxed(my_size, other.my_size); + swap_atomics_relaxed(my_max_height, other.my_max_height); + } + + void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) { + internal_swap_fields(other); + } + + void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) { + __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + node_allocator_type my_node_allocator; + key_compare my_compare; + random_level_generator_type my_rng; + std::atomic<list_node_type*> my_head_ptr; + std::atomic<size_type> my_size; + std::atomic<size_type> my_max_height; + + template<typename OtherTraits> + friend class concurrent_skip_list; +}; // class concurrent_skip_list + +template <typename Traits> +bool operator==( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + if (lhs.size() != rhs.size()) return false; +#if _MSC_VER + // Passing "unchecked" iterators to std::equal with 3 parameters + // causes compiler warnings. + // The workaround is to use overload with 4 parameters, which is + // available since C++14 - minimally supported version on MSVC + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +#else + return std::equal(lhs.begin(), lhs.end(), rhs.begin()); +#endif +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Traits> +bool operator!=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(lhs == rhs); +} +#endif + +#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT +template <typename Traits> +tbb::detail::synthesized_three_way_result<typename Traits::value_type> +operator<=>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), + rhs.begin(), rhs.end(), + tbb::detail::synthesized_three_way_comparator{}); +} +#else +template <typename Traits> +bool operator<( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename Traits> +bool operator>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return rhs < lhs; +} + +template <typename Traits> +bool operator<=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(rhs < lhs); +} + +template <typename Traits> +bool operator>=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(lhs < rhs); +} +#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT + +// Generates a number from the interval [0, MaxLevel). +template <std::size_t MaxLevel> +class concurrent_geometric_level_generator { +public: + static constexpr std::size_t max_level = MaxLevel; + // TODO: modify the algorithm to accept other values of max_level + static_assert(max_level == 32, "Incompatible max_level for rng"); + + concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {} + + std::size_t operator()() { + // +1 is required to pass at least 1 into log2 (log2(0) is undefined) + // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1) + std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1; + __TBB_ASSERT(result <= max_level, nullptr); + return result; + } + +private: + tbb::enumerable_thread_specific<std::minstd_rand> engines; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +#endif // __TBB_detail__concurrent_skip_list_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h index 3abcce2b29..b81169aaa1 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h @@ -1,1500 +1,1500 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__concurrent_unordered_base_H -#define __TBB_detail__concurrent_unordered_base_H - -#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -#include "_range_common.h" -#include "_containers_helpers.h" -#include "_segment_table.h" -#include "_hash_compare.h" -#include "_allocator_traits.h" -#include "_node_handle.h" -#include "_assert.h" -#include "_utils.h" -#include "_exception.h" -#include <iterator> -#include <utility> -#include <functional> -#include <initializer_list> -#include <atomic> -#include <type_traits> -#include <memory> -#include <algorithm> - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(push) -#pragma warning(disable: 4127) // warning C4127: conditional expression is constant -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Traits> -class concurrent_unordered_base; - -template<typename Container, typename Value> -class solist_iterator { -private: - using node_ptr = typename Container::value_node_ptr; - template <typename T, typename Allocator> - friend class split_ordered_list; - template<typename M, typename V> - friend class solist_iterator; - template <typename Traits> - friend class concurrent_unordered_base; - template<typename M, typename T, typename U> - friend bool operator==( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); - template<typename M, typename T, typename U> - friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); -public: - using value_type = Value; - using difference_type = typename Container::difference_type; - using pointer = value_type*; - using reference = value_type&; - using iterator_category = std::forward_iterator_tag; - - solist_iterator() : my_node_ptr(nullptr) {} - solist_iterator( const solist_iterator<Container, typename Container::value_type>& other ) - : my_node_ptr(other.my_node_ptr) {} - - solist_iterator& operator=( const solist_iterator<Container, typename Container::value_type>& other ) { - my_node_ptr = other.my_node_ptr; - return *this; - } - - reference operator*() const { - return my_node_ptr->value(); - } - - pointer operator->() const { - return my_node_ptr->storage(); - } - - solist_iterator& operator++() { - auto next_node = my_node_ptr->next(); - while(next_node && next_node->is_dummy()) { - next_node = next_node->next(); - } - my_node_ptr = static_cast<node_ptr>(next_node); - return *this; - } - - solist_iterator operator++(int) { - solist_iterator tmp = *this; - ++*this; - return tmp; - } - -private: - solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {} - - node_ptr get_node_ptr() const { return my_node_ptr; } - - node_ptr my_node_ptr; -}; - -template<typename Solist, typename T, typename U> -bool operator==( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { - return i.my_node_ptr == j.my_node_ptr; -} - -template<typename Solist, typename T, typename U> -bool operator!=( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { - return i.my_node_ptr != j.my_node_ptr; -} - -template <typename SokeyType> -class list_node { -public: - using node_ptr = list_node*; - using sokey_type = SokeyType; - - list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {} - - void init( sokey_type key ) { - my_order_key = key; - } - - sokey_type order_key() const { - return my_order_key; - } - - bool is_dummy() { - // The last bit of order key is unset for dummy nodes - return (my_order_key & 0x1) == 0; - } - - node_ptr next() const { - return my_next.load(std::memory_order_acquire); - } - - void set_next( node_ptr next_node ) { - my_next.store(next_node, std::memory_order_release); - } - - bool try_set_next( node_ptr expected_next, node_ptr new_next ) { - return my_next.compare_exchange_strong(expected_next, new_next); - } - -private: - std::atomic<node_ptr> my_next; - sokey_type my_order_key; -}; // class list_node - -template <typename ValueType, typename SokeyType> -class value_node : public list_node<SokeyType> -{ -public: - using base_type = list_node<SokeyType>; - using sokey_type = typename base_type::sokey_type; - using value_type = ValueType; - - value_node( sokey_type ord_key ) : base_type(ord_key) {} - ~value_node() {} - value_type* storage() { - return reinterpret_cast<value_type*>(&my_value); - } - - value_type& value() { - return *storage(); - } - -private: - using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type; - aligned_storage_type my_value; -}; // class value_node - -template <typename Traits> -class concurrent_unordered_base { - using self_type = concurrent_unordered_base<Traits>; - using traits_type = Traits; - using hash_compare_type = typename traits_type::hash_compare_type; - class unordered_segment_table; -public: - using value_type = typename traits_type::value_type; - using key_type = typename traits_type::key_type; - using allocator_type = typename traits_type::allocator_type; - -private: - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - // TODO: check assert conditions for different C++ standards - static_assert(std::is_same<typename allocator_traits_type::value_type, value_type>::value, - "value_type of the container must be the same as its allocator"); - using sokey_type = std::size_t; - -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using iterator = solist_iterator<self_type, value_type>; - using const_iterator = solist_iterator<self_type, const value_type>; - using local_iterator = iterator; - using const_local_iterator = const_iterator; - - using reference = value_type&; - using const_reference = const value_type&; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - using hasher = typename hash_compare_type::hasher; - using key_equal = typename hash_compare_type::key_equal; - -private: - using list_node_type = list_node<sokey_type>; - using value_node_type = value_node<value_type, sokey_type>; - using node_ptr = list_node_type*; - using value_node_ptr = value_node_type*; - - using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc<value_node_type>; - using node_allocator_type = typename allocator_traits_type::template rebind_alloc<list_node_type>; - - using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; - using value_node_allocator_traits = tbb::detail::allocator_traits<value_node_allocator_type>; - - static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) { - return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1)); - } - - template <typename T> - using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>; -public: - using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>; - - explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), - const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) - : my_size(0), - my_bucket_count(round_up_to_power_of_two(bucket_count)), - my_max_load_factor(float(initial_max_load_factor)), - my_hash_compare(hash, equal), - my_head(sokey_type(0)), - my_segments(alloc) {} - - concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {} - - concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc ) - : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {} - - concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc ) - : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {} - - explicit concurrent_unordered_base( const allocator_type& alloc ) - : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {} - - template <typename InputIterator> - concurrent_unordered_base( InputIterator first, InputIterator last, - size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), - const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) - : concurrent_unordered_base(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template <typename InputIterator> - concurrent_unordered_base( InputIterator first, InputIterator last, - size_type bucket_count, const allocator_type& alloc ) - : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {} - - template <typename InputIterator> - concurrent_unordered_base( InputIterator first, InputIterator last, - size_type bucket_count, const hasher& hash, const allocator_type& alloc ) - : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {} - - concurrent_unordered_base( const concurrent_unordered_base& other ) - : my_size(other.my_size.load(std::memory_order_relaxed)), - my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), - my_max_load_factor(other.my_max_load_factor), - my_hash_compare(other.my_hash_compare), - my_head(other.my_head.order_key()), - my_segments(other.my_segments) - { - try_call( [&] { - internal_copy(other); - } ).on_exception( [&] { - clear(); - }); - } - - concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc ) - : my_size(other.my_size.load(std::memory_order_relaxed)), - my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), - my_max_load_factor(other.my_max_load_factor), - my_hash_compare(other.my_hash_compare), - my_head(other.my_head.order_key()), - my_segments(other.my_segments, alloc) - { - try_call( [&] { - internal_copy(other); - } ).on_exception( [&] { - clear(); - }); - } - - concurrent_unordered_base( concurrent_unordered_base&& other ) - : my_size(other.my_size.load(std::memory_order_relaxed)), - my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), - my_max_load_factor(std::move(other.my_max_load_factor)), - my_hash_compare(std::move(other.my_hash_compare)), - my_head(other.my_head.order_key()), - my_segments(std::move(other.my_segments)) - { - move_content(std::move(other)); - } - - concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc ) - : my_size(other.my_size.load(std::memory_order_relaxed)), - my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), - my_max_load_factor(std::move(other.my_max_load_factor)), - my_hash_compare(std::move(other.my_hash_compare)), - my_head(other.my_head.order_key()), - my_segments(std::move(other.my_segments), alloc) - { - using is_always_equal = typename allocator_traits_type::is_always_equal; - internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal()); - } - - concurrent_unordered_base( std::initializer_list<value_type> init, - size_type bucket_count = initial_bucket_count, - const hasher& hash = hasher(), const key_equal& equal = key_equal(), - const allocator_type& alloc = allocator_type() ) - : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {} - - concurrent_unordered_base( std::initializer_list<value_type> init, - size_type bucket_count, const allocator_type& alloc ) - : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {} - - concurrent_unordered_base( std::initializer_list<value_type> init, - size_type bucket_count, const hasher& hash, const allocator_type& alloc ) - : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {} - - ~concurrent_unordered_base() { - internal_clear(); - } - - concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) { - if (this != &other) { - clear(); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_max_load_factor = other.my_max_load_factor; - my_hash_compare = other.my_hash_compare; - my_segments = other.my_segments; - internal_copy(other); // TODO: guards for exceptions? - } - return *this; - } - - concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) { - if (this != &other) { - clear(); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_max_load_factor = std::move(other.my_max_load_factor); - my_hash_compare = std::move(other.my_hash_compare); - my_segments = std::move(other.my_segments); - - using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment; - using is_always_equal = typename allocator_traits_type::is_always_equal; - internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); - } - return *this; - } - - concurrent_unordered_base& operator=( std::initializer_list<value_type> init ) { - clear(); - insert(init); - return *this; - } - - void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) { - if (this != &other) { - using pocs_type = typename allocator_traits_type::propagate_on_container_swap; - using is_always_equal = typename allocator_traits_type::is_always_equal; - internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); - } - } - - allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); } - - iterator begin() noexcept { return iterator(first_value_node(&my_head)); } - const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } - const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } - - iterator end() noexcept { return iterator(nullptr); } - const_iterator end() const noexcept { return const_iterator(nullptr); } - const_iterator cend() const noexcept { return const_iterator(nullptr); } - - __TBB_nodiscard bool empty() const noexcept { return size() == 0; } - size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); } - size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); } - - void clear() noexcept { - internal_clear(); - } - - std::pair<iterator, bool> insert( const value_type& value ) { - return internal_insert_value(value); - } - - std::pair<iterator, bool> insert( value_type&& value ) { - return internal_insert_value(std::move(value)); - } - - iterator insert( const_iterator, const value_type& value ) { - // Ignore hint - return insert(value).first; - } - - iterator insert( const_iterator, value_type&& value ) { - // Ignore hint - return insert(std::move(value)).first; - } - - template <typename InputIterator> - void insert( InputIterator first, InputIterator last ) { - for (; first != last; ++first) { - insert(*first); - } - } - - void insert( std::initializer_list<value_type> init ) { - insert(init.begin(), init.end()); - } - - std::pair<iterator, bool> insert( node_type&& nh ) { - if (!nh.empty()) { - value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); - auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { - insert_node->init(order_key); - return insert_node; - }; - auto insert_result = internal_insert(insert_node->value(), init_node); - if (insert_result.inserted) { - // If the insertion succeeded - set node handle to the empty state - __TBB_ASSERT(insert_result.remaining_node == nullptr, - "internal_insert_node should not return the remaining node if the insertion succeeded"); - node_handle_accessor::deactivate(nh); - } - return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; - } - return {end(), false}; - } - - iterator insert( const_iterator, node_type&& nh ) { - // Ignore hint - return insert(std::move(nh)).first; - } - - template <typename... Args> - std::pair<iterator, bool> emplace( Args&&... args ) { - // Create a node with temporary order_key 0, which will be reinitialize - // in internal_insert after the hash calculation - value_node_ptr insert_node = create_node(0, std::forward<Args>(args)...); - - auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { - insert_node->init(order_key); - return insert_node; - }; - - auto insert_result = internal_insert(insert_node->value(), init_node); - - if (!insert_result.inserted) { - // If the insertion failed - destroy the node which was created - insert_node->init(split_order_key_regular(1)); - destroy_node(insert_node); - } - - return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; - } - - template <typename... Args> - iterator emplace_hint( const_iterator, Args&&... args ) { - // Ignore hint - return emplace(std::forward<Args>(args)...).first; - } - - iterator unsafe_erase( const_iterator pos ) { - return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); - } - - iterator unsafe_erase( iterator pos ) { - return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); - } - - iterator unsafe_erase( const_iterator first, const_iterator last ) { - while(first != last) { - first = unsafe_erase(first); - } - return iterator(first.get_node_ptr()); - } - - size_type unsafe_erase( const key_type& key ) { - return internal_erase_by_key(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value - && !std::is_convertible<K, const_iterator>::value - && !std::is_convertible<K, iterator>::value, - size_type>::type unsafe_erase( const K& key ) - { - return internal_erase_by_key(key); - } - - node_type unsafe_extract( const_iterator pos ) { - internal_extract(pos.get_node_ptr()); - return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); - } - - node_type unsafe_extract( iterator pos ) { - internal_extract(pos.get_node_ptr()); - return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); - } - - node_type unsafe_extract( const key_type& key ) { - iterator item = find(key); - return item == end() ? node_type() : unsafe_extract(item); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value - && !std::is_convertible<K, const_iterator>::value - && !std::is_convertible<K, iterator>::value, - node_type>::type unsafe_extract( const K& key ) - { - iterator item = find(key); - return item == end() ? node_type() : unsafe_extract(item); - } - - // Lookup functions - iterator find( const key_type& key ) { - value_node_ptr result = internal_find(key); - return result == nullptr ? end() : iterator(result); - } - - const_iterator find( const key_type& key ) const { - value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); - return result == nullptr ? end() : const_iterator(result); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { - value_node_ptr result = internal_find(key); - return result == nullptr ? end() : iterator(result); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { - value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); - return result == nullptr ? end() : const_iterator(result); - } - - std::pair<iterator, iterator> equal_range( const key_type& key ) { - auto result = internal_equal_range(key); - return std::make_pair(iterator(result.first), iterator(result.second)); - } - - std::pair<const_iterator, const_iterator> equal_range( const key_type& key ) const { - auto result = const_cast<self_type*>(this)->internal_equal_range(key); - return std::make_pair(const_iterator(result.first), const_iterator(result.second)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { - auto result = internal_equal_range(key); - return std::make_pair(iterator(result.first), iterator(result.second)); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { - auto result = const_cast<self_type*>(this)->internal_equal_range(key); - return std::make_pair(iterator(result.first), iterator(result.second)); - } - - size_type count( const key_type& key ) const { - return internal_count(key); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { - return internal_count(key); - } - - bool contains( const key_type& key ) const { - return find(key) != end(); - } - - template <typename K> - typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { - return find(key) != end(); - } - - // Bucket interface - local_iterator unsafe_begin( size_type n ) { - return local_iterator(first_value_node(get_bucket(n))); - } - - const_local_iterator unsafe_begin( size_type n ) const { - auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); - return const_local_iterator(bucket_begin); - } - - const_local_iterator unsafe_cbegin( size_type n ) const { - auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); - return const_local_iterator(bucket_begin); - } - - local_iterator unsafe_end( size_type n ) { - size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); - return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr); - } - - const_local_iterator unsafe_end( size_type n ) const { - size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); - return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); - } - - const_local_iterator unsafe_cend( size_type n ) const { - size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); - return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); - } - - size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); } - - size_type unsafe_max_bucket_count() const { - return max_size(); - } - - size_type unsafe_bucket_size( size_type n ) const { - return size_type(std::distance(unsafe_begin(n), unsafe_end(n))); - } - - size_type unsafe_bucket( const key_type& key ) const { - return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed); - } - - // Hash policy - float load_factor() const { - return float(size() / float(my_bucket_count.load(std::memory_order_acquire))); - } - - float max_load_factor() const { return my_max_load_factor; } - - void max_load_factor( float mlf ) { - if (mlf != mlf || mlf < 0) { - tbb::detail::throw_exception(exception_id::invalid_load_factor); - } - my_max_load_factor = mlf; - } // TODO: unsafe? - - void rehash( size_type bucket_count ) { - size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); - if (current_bucket_count < bucket_count) { - // TODO: do we need do-while here? - my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count)); - } - } - - void reserve( size_type elements_count ) { - size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); - size_type necessary_bucket_count = current_bucket_count; - - do { - // TODO: Log2 seems useful here - while (necessary_bucket_count * max_load_factor() < elements_count) { - necessary_bucket_count <<= 1; - } - } while (current_bucket_count >= necessary_bucket_count || - !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count)); - } - - // Observers - hasher hash_function() const { return my_hash_compare.hash_function(); } - key_equal key_eq() const { return my_hash_compare.key_eq(); } - - class const_range_type { - private: - const concurrent_unordered_base& my_instance; - node_ptr my_begin_node; // may be node* const - node_ptr my_end_node; - mutable node_ptr my_midpoint_node; - public: - using size_type = typename concurrent_unordered_base::size_type; - using value_type = typename concurrent_unordered_base::value_type; - using reference = typename concurrent_unordered_base::reference; - using difference_type = typename concurrent_unordered_base::difference_type; - using iterator = typename concurrent_unordered_base::const_iterator; - - bool empty() const { return my_begin_node == my_end_node; } - - bool is_divisible() const { - return my_midpoint_node != my_end_node; - } - - size_type grainsize() const { return 1; } - - const_range_type( const_range_type& range, split ) - : my_instance(range.my_instance), - my_begin_node(range.my_midpoint_node), - my_end_node(range.my_end_node) - { - range.my_end_node = my_begin_node; - __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible"); - __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible"); - set_midpoint(); - range.set_midpoint(); - } - - iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); } - iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); } - - const_range_type( const concurrent_unordered_base& table ) - : my_instance(table), my_begin_node(const_cast<node_ptr>(&table.my_head)), my_end_node(nullptr) - { - set_midpoint(); - } - private: - void set_midpoint() const { - if (my_begin_node == my_end_node) { - my_midpoint_node = my_end_node; - } else { - sokey_type invalid_key = ~sokey_type(0); - sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key; - sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key; - - size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) % - my_instance.my_bucket_count.load(std::memory_order_relaxed); - while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) { - mid_bucket = my_instance.get_parent(mid_bucket); - } - if (reverse_bits(mid_bucket) > begin_key) { - // Found a dummy node between begin and end - my_midpoint_node = my_instance.first_value_node( - my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed)); - } else { - // Didn't find a dummy node between begin and end - my_midpoint_node = my_end_node; - } - } - } - }; // class const_range_type - - class range_type : public const_range_type { - public: - using iterator = typename concurrent_unordered_base::iterator; - using const_range_type::const_range_type; - - iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); } - iterator end() const { return iterator(const_range_type::end().get_node_ptr()); } - }; // class range_type - - // Parallel iteration - range_type range() { - return range_type(*this); - } - - const_range_type range() const { - return const_range_type(*this); - } -protected: - static constexpr bool allow_multimapping = traits_type::allow_multimapping; - -private: - static constexpr size_type initial_bucket_count = 8; - static constexpr float initial_max_load_factor = 4; // TODO: consider 1? - static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; - - class unordered_segment_table - : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> - { - using self_type = unordered_segment_table; - using atomic_node_ptr = std::atomic<node_ptr>; - using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; - using segment_type = typename base_type::segment_type; - using base_allocator_type = typename base_type::allocator_type; - - using segment_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_node_ptr>; - using segment_allocator_traits = tbb::detail::allocator_traits<segment_allocator_type>; - public: - // Segment table for unordered containers should not be extended in the wait- free implementation - static constexpr bool allow_table_extending = false; - static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable<hasher>::value && - std::is_nothrow_move_assignable<key_equal>::value && - segment_allocator_traits::is_always_equal::value; - static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable<hasher>::value && - tbb::detail::is_nothrow_swappable<key_equal>::value && - segment_allocator_traits::is_always_equal::value; - - // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate - unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() ) - : base_type(alloc) {} - - unordered_segment_table( const unordered_segment_table& ) = default; - - unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc ) - : base_type(other, alloc) {} - - unordered_segment_table( unordered_segment_table&& ) = default; - - unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc ) - : base_type(std::move(other), alloc) {} - - unordered_segment_table& operator=( const unordered_segment_table& ) = default; - - unordered_segment_table& operator=( unordered_segment_table&& ) = default; - - segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) { - segment_allocator_type alloc(this->get_allocator()); - size_type seg_size = this->segment_size(segment_index); - segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size); - for (size_type i = 0; i != seg_size; ++i) { - segment_allocator_traits::construct(alloc, new_segment + i, nullptr); - } - return new_segment; - } - - // deallocate_segment is required by the segment_table base class, but - // in unordered, it is also necessary to call the destructor during deallocation - void deallocate_segment( segment_type address, size_type index ) { - destroy_segment(address, index); - } - - void destroy_segment( segment_type address, size_type index ) { - segment_allocator_type alloc(this->get_allocator()); - for (size_type i = 0; i != this->segment_size(index); ++i) { - segment_allocator_traits::destroy(alloc, address + i); - } - segment_allocator_traits::deallocate(alloc, address, this->segment_size(index)); - } - - - void copy_segment( size_type index, segment_type, segment_type to ) { - if (index == 0) { - // The first element in the first segment is embedded into the table (my_head) - // so the first pointer should not be stored here - // It would be stored during move ctor/assignment operation - to[1].store(nullptr, std::memory_order_relaxed); - } else { - for (size_type i = 0; i != this->segment_size(index); ++i) { - to[i].store(nullptr, std::memory_order_relaxed); - } - } - } - - void move_segment( size_type index, segment_type from, segment_type to ) { - if (index == 0) { - // The first element in the first segment is embedded into the table (my_head) - // so the first pointer should not be stored here - // It would be stored during move ctor/assignment operation - to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed); - } else { - for (size_type i = 0; i != this->segment_size(index); ++i) { - to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed); - from[i].store(nullptr, std::memory_order_relaxed); - } - } - } - - // allocate_long_table is required by the segment_table base class, but unused for unordered containers - typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) { - __TBB_ASSERT(false, "This method should never been called"); - // TableType is a pointer - return nullptr; - } - - // destroy_elements is required by the segment_table base class, but unused for unordered containers - // this function call but do nothing - void destroy_elements() {} - }; // struct unordered_segment_table - - void internal_clear() { - // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it - node_ptr next = my_head.next(); - node_ptr curr = next; - - my_head.set_next(nullptr); - - while (curr != nullptr) { - next = curr->next(); - destroy_node(curr); - curr = next; - } - - my_size.store(0, std::memory_order_relaxed); - my_segments.clear(); - } - - void destroy_node( node_ptr node ) { - if (node->is_dummy()) { - node_allocator_type dummy_node_allocator(my_segments.get_allocator()); - // Destroy the node - node_allocator_traits::destroy(dummy_node_allocator, node); - // Deallocate the memory - node_allocator_traits::deallocate(dummy_node_allocator, node, 1); - } else { - value_node_ptr val_node = static_cast<value_node_ptr>(node); - value_node_allocator_type value_node_allocator(my_segments.get_allocator()); - // Destroy the value - value_node_allocator_traits::destroy(value_node_allocator, val_node->storage()); - // Destroy the node - value_node_allocator_traits::destroy(value_node_allocator, val_node); - // Deallocate the memory - value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1); - } - } - - struct internal_insert_return_type { - // If the insertion failed - the remaining_node points to the node, which was failed to insert - // This node can be allocated in process of insertion - value_node_ptr remaining_node; - // If the insertion failed - node_with_equal_key points to the node in the list with the - // key, equivalent to the inserted, otherwise it points to the node, which was inserted. - value_node_ptr node_with_equal_key; - // Insertion status - // NOTE: if it is true - remaining_node should be nullptr - bool inserted; - }; // struct internal_insert_return_type - - // Inserts the value into the split ordered list - template <typename ValueType> - std::pair<iterator, bool> internal_insert_value( ValueType&& value ) { - - auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr { - return create_node(order_key, std::forward<ValueType>(value)); - }; - - auto insert_result = internal_insert(value, create_value_node); - - if (insert_result.remaining_node != nullptr) { - // If the insertion fails - destroy the node which was failed to insert if it exist - __TBB_ASSERT(!insert_result.inserted, - "remaining_node should be nullptr if the node was successfully inserted"); - destroy_node(insert_result.remaining_node); - } - - return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; - } - - // Inserts the node into the split ordered list - // Creates a node using the specified callback after the place for insertion was found - // Returns internal_insert_return_type object, where: - // - If the insertion succeeded: - // - remaining_node is nullptr - // - node_with_equal_key point to the inserted node - // - inserted is true - // - If the insertion failed: - // - remaining_node points to the node, that was failed to insert if it was created. - // nullptr if the node was not created, because the requested key was already - // presented in the list - // - node_with_equal_key point to the element in the list with the key, equivalent to - // to the requested key - // - inserted is false - template <typename ValueType, typename CreateInsertNode> - internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) { - static_assert(std::is_same<typename std::decay<ValueType>::type, value_type>::value, - "Incorrect type in internal_insert"); - const key_type& key = traits_type::get_key(value); - sokey_type hash_key = sokey_type(my_hash_compare(key)); - - sokey_type order_key = split_order_key_regular(hash_key); - node_ptr prev = prepare_bucket(hash_key); - __TBB_ASSERT(prev != nullptr, "Invalid head node"); - - auto search_result = search_after(prev, order_key, key); - - if (search_result.second) { - return internal_insert_return_type{ nullptr, search_result.first, false }; - } - - value_node_ptr new_node = create_insert_node(order_key); - node_ptr curr = search_result.first; - - while (!try_insert(prev, new_node, curr)) { - search_result = search_after(prev, order_key, key); - if (search_result.second) { - return internal_insert_return_type{ new_node, search_result.first, false }; - } - curr = search_result.first; - } - - auto sz = my_size.fetch_add(1); - adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire)); - return internal_insert_return_type{ nullptr, static_cast<value_node_ptr>(new_node), true }; - } - - // Searches the node with the key, equivalent to key with requested order key after the node prev - // Returns the existing node and true if the node is already in the list - // Returns the first node with the order key, greater than requested and false if the node is not presented in the list - std::pair<value_node_ptr, bool> search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) { - // NOTE: static_cast<value_node_ptr>(curr) should be done only after we would ensure - // that the node is not a dummy node - - node_ptr curr = prev->next(); - - while (curr != nullptr && (curr->order_key() < order_key || - (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)))) - { - prev = curr; - curr = curr->next(); - } - - if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) { - return { static_cast<value_node_ptr>(curr), true }; - } - return { static_cast<value_node_ptr>(curr), false }; - } - - void adjust_table_size( size_type total_elements, size_type current_size ) { - // Grow the table by a factor of 2 if possible and needed - if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) { - // Double the size of the hash only if size hash not changed in between loads - my_bucket_count.compare_exchange_strong(current_size, 2u * current_size); - } - } - - node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) { - node_ptr prev_node = parent_dummy_node; - - node_ptr dummy_node = create_dummy_node(order_key); - node_ptr next_node; - - do { - next_node = prev_node->next(); - // Move forward through the list while the order key is less than requested - while (next_node != nullptr && next_node->order_key() < order_key) { - prev_node = next_node; - next_node = next_node->next(); - } - - if (next_node != nullptr && next_node->order_key() == order_key) { - // Another dummy node with the same order key was inserted by another thread - // Destroy the node and exit - destroy_node(dummy_node); - return next_node; - } - } while (!try_insert(prev_node, dummy_node, next_node)); - - return dummy_node; - } - - // Try to insert a node between prev_node and expected next - // If the next is not equal to expected next - return false - static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) { - new_node->set_next(current_next_node); - return prev_node->try_set_next(current_next_node, new_node); - } - - // Returns the bucket, associated with the hash_key - node_ptr prepare_bucket( sokey_type hash_key ) { - size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire); - return get_bucket(bucket); - } - - // Initialize the corresponding bucket if it is not initialized - node_ptr get_bucket( size_type bucket_index ) { - if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) { - init_bucket(bucket_index); - } - return my_segments[bucket_index].load(std::memory_order_acquire); - } - - void init_bucket( size_type bucket ) { - if (bucket == 0) { - // Atomicaly store the first bucket into my_head - node_ptr disabled = nullptr; - my_segments[0].compare_exchange_strong(disabled, &my_head); - return; - } - - size_type parent_bucket = get_parent(bucket); - - while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) { - // Initialize all of the parent buckets - init_bucket(parent_bucket); - } - - __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized"); - node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire); - - // Insert dummy node into the list - node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket)); - // TODO: consider returning pair<node_ptr, bool> to avoid store operation if the bucket was stored by an other thread - // or move store to insert_dummy_node - // Add dummy_node into the segment table - my_segments[bucket].store(dummy_node, std::memory_order_release); - } - - node_ptr create_dummy_node( sokey_type order_key ) { - node_allocator_type dummy_node_allocator(my_segments.get_allocator()); - node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1); - node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key); - return dummy_node; - } - - template <typename... Args> - value_node_ptr create_node( sokey_type order_key, Args&&... args ) { - value_node_allocator_type value_node_allocator(my_segments.get_allocator()); - // Allocate memory for the value_node - value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1); - // Construct the node - value_node_allocator_traits::construct(value_node_allocator, new_node, order_key); - - // try_call API is not convenient here due to broken - // variadic capture on GCC 4.8.5 - auto value_guard = make_raii_guard([&] { - value_node_allocator_traits::destroy(value_node_allocator, new_node); - value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1); - }); - - // Construct the value in the node - value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward<Args>(args)...); - value_guard.dismiss(); - return new_node; - } - - value_node_ptr first_value_node( node_ptr first_node ) const { - while (first_node != nullptr && first_node->is_dummy()) { - first_node = first_node->next(); - } - return static_cast<value_node_ptr>(first_node); - } - - // Unsafe method, which removes the node from the list and returns the next node - node_ptr internal_erase( value_node_ptr node_to_erase ) { - __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase"); - node_ptr next_node = node_to_erase->next(); - internal_extract(node_to_erase); - destroy_node(node_to_erase); - return next_node; - } - - template <typename K> - size_type internal_erase_by_key( const K& key ) { - // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket - // for each unsafe_erase call - auto eq_range = equal_range(key); - size_type erased_count = 0; - - for (auto it = eq_range.first; it != eq_range.second;) { - it = unsafe_erase(it); - ++erased_count; - } - return erased_count; - } - - // Unsafe method, which extracts the node from the list - void internal_extract( value_node_ptr node_to_extract ) { - const key_type& key = traits_type::get_key(node_to_extract->value()); - sokey_type hash_key = sokey_type(my_hash_compare(key)); - - node_ptr prev_node = prepare_bucket(hash_key); - - for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) { - if (node == node_to_extract) { - unlink_node(prev_node, node, node_to_extract->next()); - my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); - return; - } - __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(), - "node, which is going to be extracted should be presented in the list"); - } - } - -protected: - template <typename SourceType> - void internal_merge( SourceType&& source ) { - static_assert(std::is_same<node_type, typename std::decay<SourceType>::type::node_type>::value, - "Incompatible containers cannot be merged"); - - for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) { - if (!source_prev->next()->is_dummy()) { - value_node_ptr curr = static_cast<value_node_ptr>(source_prev->next()); - // If the multimapping is allowed, or the key is not presented - // in the *this container - extract the node from the list - if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) { - node_ptr next_node = curr->next(); - source.unlink_node(source_prev, curr, next_node); - - // Remember the old order key - sokey_type old_order_key = curr->order_key(); - - // Node handle with curr cannot be used directly in insert call, because - // the destructor of node_type will destroy curr - node_type curr_node = node_handle_accessor::construct<node_type>(curr); - - // If the insertion fails - return ownership of the node to the source - if (!insert(std::move(curr_node)).second) { - __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer"); - __TBB_ASSERT(source_prev->next() == next_node, - "Concurrent operations with the source container in merge are prohibited"); - - // Initialize the node with the old order key, because the order key - // can change during the insertion - curr->init(old_order_key); - __TBB_ASSERT(old_order_key >= source_prev->order_key() && - (next_node == nullptr || old_order_key <= next_node->order_key()), - "Wrong nodes order in the source container"); - // Merge is unsafe for source container, so the insertion back can be done without compare_exchange - curr->set_next(next_node); - source_prev->set_next(curr); - source_prev = curr; - node_handle_accessor::deactivate(curr_node); - } else { - source.my_size.fetch_sub(1, std::memory_order_relaxed); - } - } else { - source_prev = curr; - } - } else { - source_prev = source_prev->next(); - } - } - } - -private: - // Unsafe method, which unlinks the node between prev and next - void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) { - __TBB_ASSERT(prev_node->next() == node_to_unlink && - node_to_unlink->next() == next_node, - "erasing and extracting nodes from the containers are unsafe in concurrent mode"); - prev_node->set_next(next_node); - node_to_unlink->set_next(nullptr); - } - - template <typename K> - value_node_ptr internal_find( const K& key ) { - sokey_type hash_key = sokey_type(my_hash_compare(key)); - sokey_type order_key = split_order_key_regular(hash_key); - - node_ptr curr = prepare_bucket(hash_key); - - while (curr != nullptr) { - if (curr->order_key() > order_key) { - // If the order key is greater than the requested order key, - // the element is not in the hash table - return nullptr; - } else if (curr->order_key() == order_key && - my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { - // The fact that order keys match does not mean that the element is found. - // Key function comparison has to be performed to check whether this is the - // right element. If not, keep searching while order key is the same. - return static_cast<value_node_ptr>(curr); - } - curr = curr->next(); - } - - return nullptr; - } - - template <typename K> - std::pair<value_node_ptr, value_node_ptr> internal_equal_range( const K& key ) { - sokey_type hash_key = sokey_type(my_hash_compare(key)); - sokey_type order_key = split_order_key_regular(hash_key); - - node_ptr curr = prepare_bucket(hash_key); - - while (curr != nullptr) { - if (curr->order_key() > order_key) { - // If the order key is greater than the requested order key, - // the element is not in the hash table - return std::make_pair(nullptr, nullptr); - } else if (curr->order_key() == order_key && - my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { - value_node_ptr first = static_cast<value_node_ptr>(curr); - node_ptr last = first; - do { - last = last->next(); - } while (allow_multimapping && last != nullptr && !last->is_dummy() && - my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(last)->value()), key)); - return std::make_pair(first, first_value_node(last)); - } - curr = curr->next(); - } - return {nullptr, nullptr}; - } - - template <typename K> - size_type internal_count( const K& key ) const { - if (allow_multimapping) { - // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance - auto eq_range = equal_range(key); - return std::distance(eq_range.first, eq_range.second); - } else { - return contains(key) ? 1 : 0; - } - } - - void internal_copy( const concurrent_unordered_base& other ) { - node_ptr last_node = &my_head; - my_segments[0].store(&my_head, std::memory_order_relaxed); - - for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { - node_ptr new_node; - if (!node->is_dummy()) { - // The node in the right table contains a value - new_node = create_node(node->order_key(), static_cast<value_node_ptr>(node)->value()); - } else { - // The node in the right table is a dummy node - new_node = create_dummy_node(node->order_key()); - my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); - } - - last_node->set_next(new_node); - last_node = new_node; - } - } - - void internal_move( concurrent_unordered_base&& other ) { - node_ptr last_node = &my_head; - my_segments[0].store(&my_head, std::memory_order_relaxed); - - for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { - node_ptr new_node; - if (!node->is_dummy()) { - // The node in the right table contains a value - new_node = create_node(node->order_key(), std::move(static_cast<value_node_ptr>(node)->value())); - } else { - // TODO: do we need to destroy a dummy node in the right container? - // The node in the right table is a dummy_node - new_node = create_dummy_node(node->order_key()); - my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); - } - - last_node->set_next(new_node); - last_node = new_node; - } - } - - void move_content( concurrent_unordered_base&& other ) { - // NOTE: allocators should be equal - my_head.set_next(other.my_head.next()); - other.my_head.set_next(nullptr); - my_segments[0].store(&my_head, std::memory_order_relaxed); - - other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed); - other.my_max_load_factor = initial_max_load_factor; - other.my_size.store(0, std::memory_order_relaxed); - } - - void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&, - /*is_always_equal = */std::true_type ) { - // Allocators are always equal - no need to compare for equality - move_content(std::move(other)); - } - - void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc, - /*is_always_equal = */std::false_type ) { - // Allocators are not always equal - if (alloc == other.my_segments.get_allocator()) { - move_content(std::move(other)); - } else { - try_call( [&] { - internal_move(std::move(other)); - } ).on_exception( [&] { - clear(); - }); - } - } - - // Move assigns the hash table to other is any instances of allocator_type are always equal - // or propagate_on_container_move_assignment is true - void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) { - move_content(std::move(other)); - } - - // Move assigns the hash table to other is any instances of allocator_type are not always equal - // and propagate_on_container_move_assignment is false - void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) { - if (my_segments.get_allocator() == other.my_segments.get_allocator()) { - move_content(std::move(other)); - } else { - // TODO: guards for exceptions - internal_move(std::move(other)); - } - } - - void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) { - internal_swap_fields(other); - } - - void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) { - __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(), - "Swapping with unequal allocators is not allowed"); - internal_swap_fields(other); - } - - void internal_swap_fields( concurrent_unordered_base& other ) { - node_ptr first_node = my_head.next(); - my_head.set_next(other.my_head.next()); - other.my_head.set_next(first_node); - - size_type current_size = my_size.load(std::memory_order_relaxed); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_size.store(current_size, std::memory_order_relaxed); - - size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); - my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); - other.my_bucket_count.store(bucket_count, std::memory_order_relaxed); - - using std::swap; - swap(my_max_load_factor, other.my_max_load_factor); - swap(my_hash_compare, other.my_hash_compare); - my_segments.swap(other.my_segments); - - // swap() method from segment table swaps all of the segments including the first segment - // We should restore it to my_head. Without it the first segment of the container will point - // to other.my_head. - my_segments[0].store(&my_head, std::memory_order_relaxed); - other.my_segments[0].store(&other.my_head, std::memory_order_relaxed); - } - - // A regular order key has its original hash value reversed and the last bit set - static constexpr sokey_type split_order_key_regular( sokey_type hash ) { - return reverse_bits(hash) | 0x1; - } - - // A dummy order key has its original hash value reversed and the last bit unset - static constexpr sokey_type split_order_key_dummy( sokey_type hash ) { - return reverse_bits(hash) & ~sokey_type(0x1); - } - - size_type get_parent( size_type bucket ) const { - // Unset bucket's most significant turned-on bit - __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0"); - size_type msb = tbb::detail::log2(bucket); - return bucket & ~(size_type(1) << msb); - } - - size_type get_next_bucket_index( size_type bucket ) const { - size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed)); - size_type reversed_next = reverse_n_bits(bucket, bits) + 1; - return reverse_n_bits(reversed_next, bits); - } - - std::atomic<size_type> my_size; - std::atomic<size_type> my_bucket_count; - float my_max_load_factor; - hash_compare_type my_hash_compare; - - list_node_type my_head; // Head node for split ordered list - unordered_segment_table my_segments; // Segment table of pointers to nodes - - template <typename Container, typename Value> - friend class solist_iterator; - - template <typename OtherTraits> - friend class concurrent_unordered_base; -}; // class concurrent_unordered_base - -template <typename Traits> -bool operator==( const concurrent_unordered_base<Traits>& lhs, - const concurrent_unordered_base<Traits>& rhs ) { - if (&lhs == &rhs) { return true; } - if (lhs.size() != rhs.size()) { return false; } - -#if _MSC_VER - // Passing "unchecked" iterators to std::permutation with 3 parameters - // causes compiler warnings. - // The workaround is to use overload with 4 parameters, which is - // available since C++14 - minimally supported version on MSVC - return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); -#else - return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); -#endif -} - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template <typename Traits> -bool operator!=( const concurrent_unordered_base<Traits>& lhs, - const concurrent_unordered_base<Traits>& rhs ) { - return !(lhs == rhs); -} -#endif - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(pop) // warning 4127 is back -#endif - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__concurrent_unordered_base_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_unordered_base_H +#define __TBB_detail__concurrent_unordered_base_H + +#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_range_common.h" +#include "_containers_helpers.h" +#include "_segment_table.h" +#include "_hash_compare.h" +#include "_allocator_traits.h" +#include "_node_handle.h" +#include "_assert.h" +#include "_utils.h" +#include "_exception.h" +#include <iterator> +#include <utility> +#include <functional> +#include <initializer_list> +#include <atomic> +#include <type_traits> +#include <memory> +#include <algorithm> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Traits> +class concurrent_unordered_base; + +template<typename Container, typename Value> +class solist_iterator { +private: + using node_ptr = typename Container::value_node_ptr; + template <typename T, typename Allocator> + friend class split_ordered_list; + template<typename M, typename V> + friend class solist_iterator; + template <typename Traits> + friend class concurrent_unordered_base; + template<typename M, typename T, typename U> + friend bool operator==( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); + template<typename M, typename T, typename U> + friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); +public: + using value_type = Value; + using difference_type = typename Container::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::forward_iterator_tag; + + solist_iterator() : my_node_ptr(nullptr) {} + solist_iterator( const solist_iterator<Container, typename Container::value_type>& other ) + : my_node_ptr(other.my_node_ptr) {} + + solist_iterator& operator=( const solist_iterator<Container, typename Container::value_type>& other ) { + my_node_ptr = other.my_node_ptr; + return *this; + } + + reference operator*() const { + return my_node_ptr->value(); + } + + pointer operator->() const { + return my_node_ptr->storage(); + } + + solist_iterator& operator++() { + auto next_node = my_node_ptr->next(); + while(next_node && next_node->is_dummy()) { + next_node = next_node->next(); + } + my_node_ptr = static_cast<node_ptr>(next_node); + return *this; + } + + solist_iterator operator++(int) { + solist_iterator tmp = *this; + ++*this; + return tmp; + } + +private: + solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {} + + node_ptr get_node_ptr() const { return my_node_ptr; } + + node_ptr my_node_ptr; +}; + +template<typename Solist, typename T, typename U> +bool operator==( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { + return i.my_node_ptr == j.my_node_ptr; +} + +template<typename Solist, typename T, typename U> +bool operator!=( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { + return i.my_node_ptr != j.my_node_ptr; +} + +template <typename SokeyType> +class list_node { +public: + using node_ptr = list_node*; + using sokey_type = SokeyType; + + list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {} + + void init( sokey_type key ) { + my_order_key = key; + } + + sokey_type order_key() const { + return my_order_key; + } + + bool is_dummy() { + // The last bit of order key is unset for dummy nodes + return (my_order_key & 0x1) == 0; + } + + node_ptr next() const { + return my_next.load(std::memory_order_acquire); + } + + void set_next( node_ptr next_node ) { + my_next.store(next_node, std::memory_order_release); + } + + bool try_set_next( node_ptr expected_next, node_ptr new_next ) { + return my_next.compare_exchange_strong(expected_next, new_next); + } + +private: + std::atomic<node_ptr> my_next; + sokey_type my_order_key; +}; // class list_node + +template <typename ValueType, typename SokeyType> +class value_node : public list_node<SokeyType> +{ +public: + using base_type = list_node<SokeyType>; + using sokey_type = typename base_type::sokey_type; + using value_type = ValueType; + + value_node( sokey_type ord_key ) : base_type(ord_key) {} + ~value_node() {} + value_type* storage() { + return reinterpret_cast<value_type*>(&my_value); + } + + value_type& value() { + return *storage(); + } + +private: + using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type; + aligned_storage_type my_value; +}; // class value_node + +template <typename Traits> +class concurrent_unordered_base { + using self_type = concurrent_unordered_base<Traits>; + using traits_type = Traits; + using hash_compare_type = typename traits_type::hash_compare_type; + class unordered_segment_table; +public: + using value_type = typename traits_type::value_type; + using key_type = typename traits_type::key_type; + using allocator_type = typename traits_type::allocator_type; + +private: + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + // TODO: check assert conditions for different C++ standards + static_assert(std::is_same<typename allocator_traits_type::value_type, value_type>::value, + "value_type of the container must be the same as its allocator"); + using sokey_type = std::size_t; + +public: + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using iterator = solist_iterator<self_type, value_type>; + using const_iterator = solist_iterator<self_type, const value_type>; + using local_iterator = iterator; + using const_local_iterator = const_iterator; + + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using hasher = typename hash_compare_type::hasher; + using key_equal = typename hash_compare_type::key_equal; + +private: + using list_node_type = list_node<sokey_type>; + using value_node_type = value_node<value_type, sokey_type>; + using node_ptr = list_node_type*; + using value_node_ptr = value_node_type*; + + using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc<value_node_type>; + using node_allocator_type = typename allocator_traits_type::template rebind_alloc<list_node_type>; + + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + using value_node_allocator_traits = tbb::detail::allocator_traits<value_node_allocator_type>; + + static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) { + return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1)); + } + + template <typename T> + using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>; +public: + using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>; + + explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), + const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) + : my_size(0), + my_bucket_count(round_up_to_power_of_two(bucket_count)), + my_max_load_factor(float(initial_max_load_factor)), + my_hash_compare(hash, equal), + my_head(sokey_type(0)), + my_segments(alloc) {} + + concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {} + + concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {} + + concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {} + + explicit concurrent_unordered_base( const allocator_type& alloc ) + : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {} + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), + const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) + : concurrent_unordered_base(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {} + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {} + + concurrent_unordered_base( const concurrent_unordered_base& other ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(other.my_max_load_factor), + my_hash_compare(other.my_hash_compare), + my_head(other.my_head.order_key()), + my_segments(other.my_segments) + { + try_call( [&] { + internal_copy(other); + } ).on_exception( [&] { + clear(); + }); + } + + concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(other.my_max_load_factor), + my_hash_compare(other.my_hash_compare), + my_head(other.my_head.order_key()), + my_segments(other.my_segments, alloc) + { + try_call( [&] { + internal_copy(other); + } ).on_exception( [&] { + clear(); + }); + } + + concurrent_unordered_base( concurrent_unordered_base&& other ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(std::move(other.my_max_load_factor)), + my_hash_compare(std::move(other.my_hash_compare)), + my_head(other.my_head.order_key()), + my_segments(std::move(other.my_segments)) + { + move_content(std::move(other)); + } + + concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(std::move(other.my_max_load_factor)), + my_hash_compare(std::move(other.my_hash_compare)), + my_head(other.my_head.order_key()), + my_segments(std::move(other.my_segments), alloc) + { + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal()); + } + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count = initial_bucket_count, + const hasher& hash = hasher(), const key_equal& equal = key_equal(), + const allocator_type& alloc = allocator_type() ) + : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {} + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {} + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {} + + ~concurrent_unordered_base() { + internal_clear(); + } + + concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) { + if (this != &other) { + clear(); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_max_load_factor = other.my_max_load_factor; + my_hash_compare = other.my_hash_compare; + my_segments = other.my_segments; + internal_copy(other); // TODO: guards for exceptions? + } + return *this; + } + + concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) { + if (this != &other) { + clear(); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_max_load_factor = std::move(other.my_max_load_factor); + my_hash_compare = std::move(other.my_hash_compare); + my_segments = std::move(other.my_segments); + + using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment; + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); + } + return *this; + } + + concurrent_unordered_base& operator=( std::initializer_list<value_type> init ) { + clear(); + insert(init); + return *this; + } + + void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) { + if (this != &other) { + using pocs_type = typename allocator_traits_type::propagate_on_container_swap; + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); + } + } + + allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); } + + iterator begin() noexcept { return iterator(first_value_node(&my_head)); } + const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } + const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } + + iterator end() noexcept { return iterator(nullptr); } + const_iterator end() const noexcept { return const_iterator(nullptr); } + const_iterator cend() const noexcept { return const_iterator(nullptr); } + + __TBB_nodiscard bool empty() const noexcept { return size() == 0; } + size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); } + size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); } + + void clear() noexcept { + internal_clear(); + } + + std::pair<iterator, bool> insert( const value_type& value ) { + return internal_insert_value(value); + } + + std::pair<iterator, bool> insert( value_type&& value ) { + return internal_insert_value(std::move(value)); + } + + iterator insert( const_iterator, const value_type& value ) { + // Ignore hint + return insert(value).first; + } + + iterator insert( const_iterator, value_type&& value ) { + // Ignore hint + return insert(std::move(value)).first; + } + + template <typename InputIterator> + void insert( InputIterator first, InputIterator last ) { + for (; first != last; ++first) { + insert(*first); + } + } + + void insert( std::initializer_list<value_type> init ) { + insert(init.begin(), init.end()); + } + + std::pair<iterator, bool> insert( node_type&& nh ) { + if (!nh.empty()) { + value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); + auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { + insert_node->init(order_key); + return insert_node; + }; + auto insert_result = internal_insert(insert_node->value(), init_node); + if (insert_result.inserted) { + // If the insertion succeeded - set node handle to the empty state + __TBB_ASSERT(insert_result.remaining_node == nullptr, + "internal_insert_node should not return the remaining node if the insertion succeeded"); + node_handle_accessor::deactivate(nh); + } + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + return {end(), false}; + } + + iterator insert( const_iterator, node_type&& nh ) { + // Ignore hint + return insert(std::move(nh)).first; + } + + template <typename... Args> + std::pair<iterator, bool> emplace( Args&&... args ) { + // Create a node with temporary order_key 0, which will be reinitialize + // in internal_insert after the hash calculation + value_node_ptr insert_node = create_node(0, std::forward<Args>(args)...); + + auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { + insert_node->init(order_key); + return insert_node; + }; + + auto insert_result = internal_insert(insert_node->value(), init_node); + + if (!insert_result.inserted) { + // If the insertion failed - destroy the node which was created + insert_node->init(split_order_key_regular(1)); + destroy_node(insert_node); + } + + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + + template <typename... Args> + iterator emplace_hint( const_iterator, Args&&... args ) { + // Ignore hint + return emplace(std::forward<Args>(args)...).first; + } + + iterator unsafe_erase( const_iterator pos ) { + return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); + } + + iterator unsafe_erase( iterator pos ) { + return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); + } + + iterator unsafe_erase( const_iterator first, const_iterator last ) { + while(first != last) { + first = unsafe_erase(first); + } + return iterator(first.get_node_ptr()); + } + + size_type unsafe_erase( const key_type& key ) { + return internal_erase_by_key(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + size_type>::type unsafe_erase( const K& key ) + { + return internal_erase_by_key(key); + } + + node_type unsafe_extract( const_iterator pos ) { + internal_extract(pos.get_node_ptr()); + return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + } + + node_type unsafe_extract( iterator pos ) { + internal_extract(pos.get_node_ptr()); + return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + } + + node_type unsafe_extract( const key_type& key ) { + iterator item = find(key); + return item == end() ? node_type() : unsafe_extract(item); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + node_type>::type unsafe_extract( const K& key ) + { + iterator item = find(key); + return item == end() ? node_type() : unsafe_extract(item); + } + + // Lookup functions + iterator find( const key_type& key ) { + value_node_ptr result = internal_find(key); + return result == nullptr ? end() : iterator(result); + } + + const_iterator find( const key_type& key ) const { + value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); + return result == nullptr ? end() : const_iterator(result); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { + value_node_ptr result = internal_find(key); + return result == nullptr ? end() : iterator(result); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { + value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); + return result == nullptr ? end() : const_iterator(result); + } + + std::pair<iterator, iterator> equal_range( const key_type& key ) { + auto result = internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + std::pair<const_iterator, const_iterator> equal_range( const key_type& key ) const { + auto result = const_cast<self_type*>(this)->internal_equal_range(key); + return std::make_pair(const_iterator(result.first), const_iterator(result.second)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { + auto result = internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { + auto result = const_cast<self_type*>(this)->internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + size_type count( const key_type& key ) const { + return internal_count(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { + return internal_count(key); + } + + bool contains( const key_type& key ) const { + return find(key) != end(); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { + return find(key) != end(); + } + + // Bucket interface + local_iterator unsafe_begin( size_type n ) { + return local_iterator(first_value_node(get_bucket(n))); + } + + const_local_iterator unsafe_begin( size_type n ) const { + auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); + return const_local_iterator(bucket_begin); + } + + const_local_iterator unsafe_cbegin( size_type n ) const { + auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); + return const_local_iterator(bucket_begin); + } + + local_iterator unsafe_end( size_type n ) { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr); + } + + const_local_iterator unsafe_end( size_type n ) const { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); + } + + const_local_iterator unsafe_cend( size_type n ) const { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); + } + + size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); } + + size_type unsafe_max_bucket_count() const { + return max_size(); + } + + size_type unsafe_bucket_size( size_type n ) const { + return size_type(std::distance(unsafe_begin(n), unsafe_end(n))); + } + + size_type unsafe_bucket( const key_type& key ) const { + return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed); + } + + // Hash policy + float load_factor() const { + return float(size() / float(my_bucket_count.load(std::memory_order_acquire))); + } + + float max_load_factor() const { return my_max_load_factor; } + + void max_load_factor( float mlf ) { + if (mlf != mlf || mlf < 0) { + tbb::detail::throw_exception(exception_id::invalid_load_factor); + } + my_max_load_factor = mlf; + } // TODO: unsafe? + + void rehash( size_type bucket_count ) { + size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); + if (current_bucket_count < bucket_count) { + // TODO: do we need do-while here? + my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count)); + } + } + + void reserve( size_type elements_count ) { + size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); + size_type necessary_bucket_count = current_bucket_count; + + do { + // TODO: Log2 seems useful here + while (necessary_bucket_count * max_load_factor() < elements_count) { + necessary_bucket_count <<= 1; + } + } while (current_bucket_count >= necessary_bucket_count || + !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count)); + } + + // Observers + hasher hash_function() const { return my_hash_compare.hash_function(); } + key_equal key_eq() const { return my_hash_compare.key_eq(); } + + class const_range_type { + private: + const concurrent_unordered_base& my_instance; + node_ptr my_begin_node; // may be node* const + node_ptr my_end_node; + mutable node_ptr my_midpoint_node; + public: + using size_type = typename concurrent_unordered_base::size_type; + using value_type = typename concurrent_unordered_base::value_type; + using reference = typename concurrent_unordered_base::reference; + using difference_type = typename concurrent_unordered_base::difference_type; + using iterator = typename concurrent_unordered_base::const_iterator; + + bool empty() const { return my_begin_node == my_end_node; } + + bool is_divisible() const { + return my_midpoint_node != my_end_node; + } + + size_type grainsize() const { return 1; } + + const_range_type( const_range_type& range, split ) + : my_instance(range.my_instance), + my_begin_node(range.my_midpoint_node), + my_end_node(range.my_end_node) + { + range.my_end_node = my_begin_node; + __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible"); + __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible"); + set_midpoint(); + range.set_midpoint(); + } + + iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); } + iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); } + + const_range_type( const concurrent_unordered_base& table ) + : my_instance(table), my_begin_node(const_cast<node_ptr>(&table.my_head)), my_end_node(nullptr) + { + set_midpoint(); + } + private: + void set_midpoint() const { + if (my_begin_node == my_end_node) { + my_midpoint_node = my_end_node; + } else { + sokey_type invalid_key = ~sokey_type(0); + sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key; + sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key; + + size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) % + my_instance.my_bucket_count.load(std::memory_order_relaxed); + while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) { + mid_bucket = my_instance.get_parent(mid_bucket); + } + if (reverse_bits(mid_bucket) > begin_key) { + // Found a dummy node between begin and end + my_midpoint_node = my_instance.first_value_node( + my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed)); + } else { + // Didn't find a dummy node between begin and end + my_midpoint_node = my_end_node; + } + } + } + }; // class const_range_type + + class range_type : public const_range_type { + public: + using iterator = typename concurrent_unordered_base::iterator; + using const_range_type::const_range_type; + + iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); } + iterator end() const { return iterator(const_range_type::end().get_node_ptr()); } + }; // class range_type + + // Parallel iteration + range_type range() { + return range_type(*this); + } + + const_range_type range() const { + return const_range_type(*this); + } +protected: + static constexpr bool allow_multimapping = traits_type::allow_multimapping; + +private: + static constexpr size_type initial_bucket_count = 8; + static constexpr float initial_max_load_factor = 4; // TODO: consider 1? + static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; + + class unordered_segment_table + : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> + { + using self_type = unordered_segment_table; + using atomic_node_ptr = std::atomic<node_ptr>; + using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; + using segment_type = typename base_type::segment_type; + using base_allocator_type = typename base_type::allocator_type; + + using segment_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_node_ptr>; + using segment_allocator_traits = tbb::detail::allocator_traits<segment_allocator_type>; + public: + // Segment table for unordered containers should not be extended in the wait- free implementation + static constexpr bool allow_table_extending = false; + static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable<hasher>::value && + std::is_nothrow_move_assignable<key_equal>::value && + segment_allocator_traits::is_always_equal::value; + static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable<hasher>::value && + tbb::detail::is_nothrow_swappable<key_equal>::value && + segment_allocator_traits::is_always_equal::value; + + // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate + unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() ) + : base_type(alloc) {} + + unordered_segment_table( const unordered_segment_table& ) = default; + + unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc ) + : base_type(other, alloc) {} + + unordered_segment_table( unordered_segment_table&& ) = default; + + unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc ) + : base_type(std::move(other), alloc) {} + + unordered_segment_table& operator=( const unordered_segment_table& ) = default; + + unordered_segment_table& operator=( unordered_segment_table&& ) = default; + + segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) { + segment_allocator_type alloc(this->get_allocator()); + size_type seg_size = this->segment_size(segment_index); + segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size); + for (size_type i = 0; i != seg_size; ++i) { + segment_allocator_traits::construct(alloc, new_segment + i, nullptr); + } + return new_segment; + } + + // deallocate_segment is required by the segment_table base class, but + // in unordered, it is also necessary to call the destructor during deallocation + void deallocate_segment( segment_type address, size_type index ) { + destroy_segment(address, index); + } + + void destroy_segment( segment_type address, size_type index ) { + segment_allocator_type alloc(this->get_allocator()); + for (size_type i = 0; i != this->segment_size(index); ++i) { + segment_allocator_traits::destroy(alloc, address + i); + } + segment_allocator_traits::deallocate(alloc, address, this->segment_size(index)); + } + + + void copy_segment( size_type index, segment_type, segment_type to ) { + if (index == 0) { + // The first element in the first segment is embedded into the table (my_head) + // so the first pointer should not be stored here + // It would be stored during move ctor/assignment operation + to[1].store(nullptr, std::memory_order_relaxed); + } else { + for (size_type i = 0; i != this->segment_size(index); ++i) { + to[i].store(nullptr, std::memory_order_relaxed); + } + } + } + + void move_segment( size_type index, segment_type from, segment_type to ) { + if (index == 0) { + // The first element in the first segment is embedded into the table (my_head) + // so the first pointer should not be stored here + // It would be stored during move ctor/assignment operation + to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed); + } else { + for (size_type i = 0; i != this->segment_size(index); ++i) { + to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed); + from[i].store(nullptr, std::memory_order_relaxed); + } + } + } + + // allocate_long_table is required by the segment_table base class, but unused for unordered containers + typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) { + __TBB_ASSERT(false, "This method should never been called"); + // TableType is a pointer + return nullptr; + } + + // destroy_elements is required by the segment_table base class, but unused for unordered containers + // this function call but do nothing + void destroy_elements() {} + }; // struct unordered_segment_table + + void internal_clear() { + // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it + node_ptr next = my_head.next(); + node_ptr curr = next; + + my_head.set_next(nullptr); + + while (curr != nullptr) { + next = curr->next(); + destroy_node(curr); + curr = next; + } + + my_size.store(0, std::memory_order_relaxed); + my_segments.clear(); + } + + void destroy_node( node_ptr node ) { + if (node->is_dummy()) { + node_allocator_type dummy_node_allocator(my_segments.get_allocator()); + // Destroy the node + node_allocator_traits::destroy(dummy_node_allocator, node); + // Deallocate the memory + node_allocator_traits::deallocate(dummy_node_allocator, node, 1); + } else { + value_node_ptr val_node = static_cast<value_node_ptr>(node); + value_node_allocator_type value_node_allocator(my_segments.get_allocator()); + // Destroy the value + value_node_allocator_traits::destroy(value_node_allocator, val_node->storage()); + // Destroy the node + value_node_allocator_traits::destroy(value_node_allocator, val_node); + // Deallocate the memory + value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1); + } + } + + struct internal_insert_return_type { + // If the insertion failed - the remaining_node points to the node, which was failed to insert + // This node can be allocated in process of insertion + value_node_ptr remaining_node; + // If the insertion failed - node_with_equal_key points to the node in the list with the + // key, equivalent to the inserted, otherwise it points to the node, which was inserted. + value_node_ptr node_with_equal_key; + // Insertion status + // NOTE: if it is true - remaining_node should be nullptr + bool inserted; + }; // struct internal_insert_return_type + + // Inserts the value into the split ordered list + template <typename ValueType> + std::pair<iterator, bool> internal_insert_value( ValueType&& value ) { + + auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr { + return create_node(order_key, std::forward<ValueType>(value)); + }; + + auto insert_result = internal_insert(value, create_value_node); + + if (insert_result.remaining_node != nullptr) { + // If the insertion fails - destroy the node which was failed to insert if it exist + __TBB_ASSERT(!insert_result.inserted, + "remaining_node should be nullptr if the node was successfully inserted"); + destroy_node(insert_result.remaining_node); + } + + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + + // Inserts the node into the split ordered list + // Creates a node using the specified callback after the place for insertion was found + // Returns internal_insert_return_type object, where: + // - If the insertion succeeded: + // - remaining_node is nullptr + // - node_with_equal_key point to the inserted node + // - inserted is true + // - If the insertion failed: + // - remaining_node points to the node, that was failed to insert if it was created. + // nullptr if the node was not created, because the requested key was already + // presented in the list + // - node_with_equal_key point to the element in the list with the key, equivalent to + // to the requested key + // - inserted is false + template <typename ValueType, typename CreateInsertNode> + internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) { + static_assert(std::is_same<typename std::decay<ValueType>::type, value_type>::value, + "Incorrect type in internal_insert"); + const key_type& key = traits_type::get_key(value); + sokey_type hash_key = sokey_type(my_hash_compare(key)); + + sokey_type order_key = split_order_key_regular(hash_key); + node_ptr prev = prepare_bucket(hash_key); + __TBB_ASSERT(prev != nullptr, "Invalid head node"); + + auto search_result = search_after(prev, order_key, key); + + if (search_result.second) { + return internal_insert_return_type{ nullptr, search_result.first, false }; + } + + value_node_ptr new_node = create_insert_node(order_key); + node_ptr curr = search_result.first; + + while (!try_insert(prev, new_node, curr)) { + search_result = search_after(prev, order_key, key); + if (search_result.second) { + return internal_insert_return_type{ new_node, search_result.first, false }; + } + curr = search_result.first; + } + + auto sz = my_size.fetch_add(1); + adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire)); + return internal_insert_return_type{ nullptr, static_cast<value_node_ptr>(new_node), true }; + } + + // Searches the node with the key, equivalent to key with requested order key after the node prev + // Returns the existing node and true if the node is already in the list + // Returns the first node with the order key, greater than requested and false if the node is not presented in the list + std::pair<value_node_ptr, bool> search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) { + // NOTE: static_cast<value_node_ptr>(curr) should be done only after we would ensure + // that the node is not a dummy node + + node_ptr curr = prev->next(); + + while (curr != nullptr && (curr->order_key() < order_key || + (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)))) + { + prev = curr; + curr = curr->next(); + } + + if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) { + return { static_cast<value_node_ptr>(curr), true }; + } + return { static_cast<value_node_ptr>(curr), false }; + } + + void adjust_table_size( size_type total_elements, size_type current_size ) { + // Grow the table by a factor of 2 if possible and needed + if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) { + // Double the size of the hash only if size hash not changed in between loads + my_bucket_count.compare_exchange_strong(current_size, 2u * current_size); + } + } + + node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) { + node_ptr prev_node = parent_dummy_node; + + node_ptr dummy_node = create_dummy_node(order_key); + node_ptr next_node; + + do { + next_node = prev_node->next(); + // Move forward through the list while the order key is less than requested + while (next_node != nullptr && next_node->order_key() < order_key) { + prev_node = next_node; + next_node = next_node->next(); + } + + if (next_node != nullptr && next_node->order_key() == order_key) { + // Another dummy node with the same order key was inserted by another thread + // Destroy the node and exit + destroy_node(dummy_node); + return next_node; + } + } while (!try_insert(prev_node, dummy_node, next_node)); + + return dummy_node; + } + + // Try to insert a node between prev_node and expected next + // If the next is not equal to expected next - return false + static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) { + new_node->set_next(current_next_node); + return prev_node->try_set_next(current_next_node, new_node); + } + + // Returns the bucket, associated with the hash_key + node_ptr prepare_bucket( sokey_type hash_key ) { + size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire); + return get_bucket(bucket); + } + + // Initialize the corresponding bucket if it is not initialized + node_ptr get_bucket( size_type bucket_index ) { + if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) { + init_bucket(bucket_index); + } + return my_segments[bucket_index].load(std::memory_order_acquire); + } + + void init_bucket( size_type bucket ) { + if (bucket == 0) { + // Atomicaly store the first bucket into my_head + node_ptr disabled = nullptr; + my_segments[0].compare_exchange_strong(disabled, &my_head); + return; + } + + size_type parent_bucket = get_parent(bucket); + + while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) { + // Initialize all of the parent buckets + init_bucket(parent_bucket); + } + + __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized"); + node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire); + + // Insert dummy node into the list + node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket)); + // TODO: consider returning pair<node_ptr, bool> to avoid store operation if the bucket was stored by an other thread + // or move store to insert_dummy_node + // Add dummy_node into the segment table + my_segments[bucket].store(dummy_node, std::memory_order_release); + } + + node_ptr create_dummy_node( sokey_type order_key ) { + node_allocator_type dummy_node_allocator(my_segments.get_allocator()); + node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1); + node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key); + return dummy_node; + } + + template <typename... Args> + value_node_ptr create_node( sokey_type order_key, Args&&... args ) { + value_node_allocator_type value_node_allocator(my_segments.get_allocator()); + // Allocate memory for the value_node + value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1); + // Construct the node + value_node_allocator_traits::construct(value_node_allocator, new_node, order_key); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + value_node_allocator_traits::destroy(value_node_allocator, new_node); + value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1); + }); + + // Construct the value in the node + value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward<Args>(args)...); + value_guard.dismiss(); + return new_node; + } + + value_node_ptr first_value_node( node_ptr first_node ) const { + while (first_node != nullptr && first_node->is_dummy()) { + first_node = first_node->next(); + } + return static_cast<value_node_ptr>(first_node); + } + + // Unsafe method, which removes the node from the list and returns the next node + node_ptr internal_erase( value_node_ptr node_to_erase ) { + __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase"); + node_ptr next_node = node_to_erase->next(); + internal_extract(node_to_erase); + destroy_node(node_to_erase); + return next_node; + } + + template <typename K> + size_type internal_erase_by_key( const K& key ) { + // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket + // for each unsafe_erase call + auto eq_range = equal_range(key); + size_type erased_count = 0; + + for (auto it = eq_range.first; it != eq_range.second;) { + it = unsafe_erase(it); + ++erased_count; + } + return erased_count; + } + + // Unsafe method, which extracts the node from the list + void internal_extract( value_node_ptr node_to_extract ) { + const key_type& key = traits_type::get_key(node_to_extract->value()); + sokey_type hash_key = sokey_type(my_hash_compare(key)); + + node_ptr prev_node = prepare_bucket(hash_key); + + for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) { + if (node == node_to_extract) { + unlink_node(prev_node, node, node_to_extract->next()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + return; + } + __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(), + "node, which is going to be extracted should be presented in the list"); + } + } + +protected: + template <typename SourceType> + void internal_merge( SourceType&& source ) { + static_assert(std::is_same<node_type, typename std::decay<SourceType>::type::node_type>::value, + "Incompatible containers cannot be merged"); + + for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) { + if (!source_prev->next()->is_dummy()) { + value_node_ptr curr = static_cast<value_node_ptr>(source_prev->next()); + // If the multimapping is allowed, or the key is not presented + // in the *this container - extract the node from the list + if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) { + node_ptr next_node = curr->next(); + source.unlink_node(source_prev, curr, next_node); + + // Remember the old order key + sokey_type old_order_key = curr->order_key(); + + // Node handle with curr cannot be used directly in insert call, because + // the destructor of node_type will destroy curr + node_type curr_node = node_handle_accessor::construct<node_type>(curr); + + // If the insertion fails - return ownership of the node to the source + if (!insert(std::move(curr_node)).second) { + __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer"); + __TBB_ASSERT(source_prev->next() == next_node, + "Concurrent operations with the source container in merge are prohibited"); + + // Initialize the node with the old order key, because the order key + // can change during the insertion + curr->init(old_order_key); + __TBB_ASSERT(old_order_key >= source_prev->order_key() && + (next_node == nullptr || old_order_key <= next_node->order_key()), + "Wrong nodes order in the source container"); + // Merge is unsafe for source container, so the insertion back can be done without compare_exchange + curr->set_next(next_node); + source_prev->set_next(curr); + source_prev = curr; + node_handle_accessor::deactivate(curr_node); + } else { + source.my_size.fetch_sub(1, std::memory_order_relaxed); + } + } else { + source_prev = curr; + } + } else { + source_prev = source_prev->next(); + } + } + } + +private: + // Unsafe method, which unlinks the node between prev and next + void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) { + __TBB_ASSERT(prev_node->next() == node_to_unlink && + node_to_unlink->next() == next_node, + "erasing and extracting nodes from the containers are unsafe in concurrent mode"); + prev_node->set_next(next_node); + node_to_unlink->set_next(nullptr); + } + + template <typename K> + value_node_ptr internal_find( const K& key ) { + sokey_type hash_key = sokey_type(my_hash_compare(key)); + sokey_type order_key = split_order_key_regular(hash_key); + + node_ptr curr = prepare_bucket(hash_key); + + while (curr != nullptr) { + if (curr->order_key() > order_key) { + // If the order key is greater than the requested order key, + // the element is not in the hash table + return nullptr; + } else if (curr->order_key() == order_key && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { + // The fact that order keys match does not mean that the element is found. + // Key function comparison has to be performed to check whether this is the + // right element. If not, keep searching while order key is the same. + return static_cast<value_node_ptr>(curr); + } + curr = curr->next(); + } + + return nullptr; + } + + template <typename K> + std::pair<value_node_ptr, value_node_ptr> internal_equal_range( const K& key ) { + sokey_type hash_key = sokey_type(my_hash_compare(key)); + sokey_type order_key = split_order_key_regular(hash_key); + + node_ptr curr = prepare_bucket(hash_key); + + while (curr != nullptr) { + if (curr->order_key() > order_key) { + // If the order key is greater than the requested order key, + // the element is not in the hash table + return std::make_pair(nullptr, nullptr); + } else if (curr->order_key() == order_key && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { + value_node_ptr first = static_cast<value_node_ptr>(curr); + node_ptr last = first; + do { + last = last->next(); + } while (allow_multimapping && last != nullptr && !last->is_dummy() && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(last)->value()), key)); + return std::make_pair(first, first_value_node(last)); + } + curr = curr->next(); + } + return {nullptr, nullptr}; + } + + template <typename K> + size_type internal_count( const K& key ) const { + if (allow_multimapping) { + // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance + auto eq_range = equal_range(key); + return std::distance(eq_range.first, eq_range.second); + } else { + return contains(key) ? 1 : 0; + } + } + + void internal_copy( const concurrent_unordered_base& other ) { + node_ptr last_node = &my_head; + my_segments[0].store(&my_head, std::memory_order_relaxed); + + for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { + node_ptr new_node; + if (!node->is_dummy()) { + // The node in the right table contains a value + new_node = create_node(node->order_key(), static_cast<value_node_ptr>(node)->value()); + } else { + // The node in the right table is a dummy node + new_node = create_dummy_node(node->order_key()); + my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); + } + + last_node->set_next(new_node); + last_node = new_node; + } + } + + void internal_move( concurrent_unordered_base&& other ) { + node_ptr last_node = &my_head; + my_segments[0].store(&my_head, std::memory_order_relaxed); + + for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { + node_ptr new_node; + if (!node->is_dummy()) { + // The node in the right table contains a value + new_node = create_node(node->order_key(), std::move(static_cast<value_node_ptr>(node)->value())); + } else { + // TODO: do we need to destroy a dummy node in the right container? + // The node in the right table is a dummy_node + new_node = create_dummy_node(node->order_key()); + my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); + } + + last_node->set_next(new_node); + last_node = new_node; + } + } + + void move_content( concurrent_unordered_base&& other ) { + // NOTE: allocators should be equal + my_head.set_next(other.my_head.next()); + other.my_head.set_next(nullptr); + my_segments[0].store(&my_head, std::memory_order_relaxed); + + other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed); + other.my_max_load_factor = initial_max_load_factor; + other.my_size.store(0, std::memory_order_relaxed); + } + + void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&, + /*is_always_equal = */std::true_type ) { + // Allocators are always equal - no need to compare for equality + move_content(std::move(other)); + } + + void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc, + /*is_always_equal = */std::false_type ) { + // Allocators are not always equal + if (alloc == other.my_segments.get_allocator()) { + move_content(std::move(other)); + } else { + try_call( [&] { + internal_move(std::move(other)); + } ).on_exception( [&] { + clear(); + }); + } + } + + // Move assigns the hash table to other is any instances of allocator_type are always equal + // or propagate_on_container_move_assignment is true + void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) { + move_content(std::move(other)); + } + + // Move assigns the hash table to other is any instances of allocator_type are not always equal + // and propagate_on_container_move_assignment is false + void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) { + if (my_segments.get_allocator() == other.my_segments.get_allocator()) { + move_content(std::move(other)); + } else { + // TODO: guards for exceptions + internal_move(std::move(other)); + } + } + + void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) { + internal_swap_fields(other); + } + + void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) { + __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(), + "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + void internal_swap_fields( concurrent_unordered_base& other ) { + node_ptr first_node = my_head.next(); + my_head.set_next(other.my_head.next()); + other.my_head.set_next(first_node); + + size_type current_size = my_size.load(std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(current_size, std::memory_order_relaxed); + + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_bucket_count.store(bucket_count, std::memory_order_relaxed); + + using std::swap; + swap(my_max_load_factor, other.my_max_load_factor); + swap(my_hash_compare, other.my_hash_compare); + my_segments.swap(other.my_segments); + + // swap() method from segment table swaps all of the segments including the first segment + // We should restore it to my_head. Without it the first segment of the container will point + // to other.my_head. + my_segments[0].store(&my_head, std::memory_order_relaxed); + other.my_segments[0].store(&other.my_head, std::memory_order_relaxed); + } + + // A regular order key has its original hash value reversed and the last bit set + static constexpr sokey_type split_order_key_regular( sokey_type hash ) { + return reverse_bits(hash) | 0x1; + } + + // A dummy order key has its original hash value reversed and the last bit unset + static constexpr sokey_type split_order_key_dummy( sokey_type hash ) { + return reverse_bits(hash) & ~sokey_type(0x1); + } + + size_type get_parent( size_type bucket ) const { + // Unset bucket's most significant turned-on bit + __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0"); + size_type msb = tbb::detail::log2(bucket); + return bucket & ~(size_type(1) << msb); + } + + size_type get_next_bucket_index( size_type bucket ) const { + size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed)); + size_type reversed_next = reverse_n_bits(bucket, bits) + 1; + return reverse_n_bits(reversed_next, bits); + } + + std::atomic<size_type> my_size; + std::atomic<size_type> my_bucket_count; + float my_max_load_factor; + hash_compare_type my_hash_compare; + + list_node_type my_head; // Head node for split ordered list + unordered_segment_table my_segments; // Segment table of pointers to nodes + + template <typename Container, typename Value> + friend class solist_iterator; + + template <typename OtherTraits> + friend class concurrent_unordered_base; +}; // class concurrent_unordered_base + +template <typename Traits> +bool operator==( const concurrent_unordered_base<Traits>& lhs, + const concurrent_unordered_base<Traits>& rhs ) { + if (&lhs == &rhs) { return true; } + if (lhs.size() != rhs.size()) { return false; } + +#if _MSC_VER + // Passing "unchecked" iterators to std::permutation with 3 parameters + // causes compiler warnings. + // The workaround is to use overload with 4 parameters, which is + // available since C++14 - minimally supported version on MSVC + return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +#else + return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); +#endif +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Traits> +bool operator!=( const concurrent_unordered_base<Traits>& lhs, + const concurrent_unordered_base<Traits>& rhs ) { + return !(lhs == rhs); +} +#endif + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__concurrent_unordered_base_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h index 251ebb8d82..1f9b0fff13 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h @@ -1,483 +1,483 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__config_H -#define __TBB_detail__config_H - -/** This header is supposed to contain macro definitions only. - The macros defined here are intended to control such aspects of TBB build as - - presence of compiler features - - compilation modes - - feature sets - - known compiler/platform issues -**/ - -/* Check which standard library we use. */ -#include <cstddef> - -#if _MSC_VER - #define __TBB_EXPORTED_FUNC __cdecl - #define __TBB_EXPORTED_METHOD __thiscall -#else - #define __TBB_EXPORTED_FUNC - #define __TBB_EXPORTED_METHOD -#endif - -#if defined(_MSVC_LANG) - #define __TBB_LANG _MSVC_LANG -#else - #define __TBB_LANG __cplusplus -#endif // _MSVC_LANG - -#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) -#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L) - -#if __INTEL_COMPILER || _MSC_VER - #define __TBB_NOINLINE(decl) __declspec(noinline) decl -#elif __GNUC__ - #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) -#else - #define __TBB_NOINLINE(decl) decl -#endif - -#define __TBB_STRING_AUX(x) #x -#define __TBB_STRING(x) __TBB_STRING_AUX(x) - -// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match -// the actual GCC version on the system. -#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - -/* Check which standard library we use. */ - -// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro. -// Therefore we use different ways to detect its version. -#ifdef TBB_USE_GLIBCXX_VERSION - // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro. - // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1. - #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION -#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__ - // Reported versions of GCC and libstdc++ do not match; trust the latter - #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000) -#elif __GLIBCPP__ || __GLIBCXX__ - // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION - #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION -#endif - -#if __clang__ - // according to clang documentation, version can be vendor specific - #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) -#endif - -/** Macro helpers **/ - -#define __TBB_CONCAT_AUX(A,B) A##B -// The additional level of indirection is needed to expand macros A and B (not to get the AB macro). -// See [cpp.subst] and [cpp.concat] for more details. -#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B) -// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty). -#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) -#define __TBB_MACRO_EMPTY 1 - -#if _M_X64 - #define __TBB_W(name) name##64 -#else - #define __TBB_W(name) name -#endif - -/** User controlled TBB features & modes **/ - -#ifndef TBB_USE_DEBUG - /* - There are four cases that are supported: - 1. "_DEBUG is undefined" means "no debug"; - 2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug"; - 3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug"; - 4. "_DEBUG defined to nothing (empty)" means "debug". - */ - #ifdef _DEBUG - // Check if _DEBUG is empty. - #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY) - #if __TBB_IS__DEBUG_EMPTY - #define TBB_USE_DEBUG 1 - #else - #define TBB_USE_DEBUG _DEBUG - #endif // __TBB_IS__DEBUG_EMPTY - #else - #define TBB_USE_DEBUG 0 - #endif // _DEBUG -#endif // TBB_USE_DEBUG - -#ifndef TBB_USE_ASSERT - #define TBB_USE_ASSERT TBB_USE_DEBUG -#endif // TBB_USE_ASSERT - -#ifndef TBB_USE_PROFILING_TOOLS -#if TBB_USE_DEBUG - #define TBB_USE_PROFILING_TOOLS 2 -#else // TBB_USE_DEBUG - #define TBB_USE_PROFILING_TOOLS 0 -#endif // TBB_USE_DEBUG -#endif // TBB_USE_PROFILING_TOOLS - -// Exceptions support cases -#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC) - #if TBB_USE_EXCEPTIONS - #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. - #elif !defined(TBB_USE_EXCEPTIONS) - #define TBB_USE_EXCEPTIONS 0 - #endif -#elif !defined(TBB_USE_EXCEPTIONS) - #define TBB_USE_EXCEPTIONS 1 -#endif - -/** Preprocessor symbols to determine HW architecture **/ - -#if _WIN32 || _WIN64 - #if defined(_M_X64) || defined(__x86_64__) // the latter for MinGW support - #define __TBB_x86_64 1 - #elif defined(_M_IA64) - #define __TBB_ipf 1 - #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support - #define __TBB_x86_32 1 - #else - #define __TBB_generic_arch 1 - #endif -#else /* Assume generic Unix */ - #if __x86_64__ - #define __TBB_x86_64 1 - #elif __ia64__ - #define __TBB_ipf 1 - #elif __i386__||__i386 // __i386 is for Sun OS - #define __TBB_x86_32 1 - #else - #define __TBB_generic_arch 1 - #endif -#endif - -/** Windows API or POSIX API **/ - -#if _WIN32 || _WIN64 - #define __TBB_USE_WINAPI 1 -#else - #define __TBB_USE_POSIX 1 -#endif - -/** Internal TBB features & modes **/ - -/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ -#ifndef __TBB_DYNAMIC_LOAD_ENABLED - #define __TBB_DYNAMIC_LOAD_ENABLED 1 -#endif - -/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load - shared libraries at run time only from application container **/ -#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP - #define __TBB_WIN8UI_SUPPORT 1 -#else - #define __TBB_WIN8UI_SUPPORT 0 -#endif - -/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ -#ifndef __TBB_WEAK_SYMBOLS_PRESENT - #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) -#endif - -/** Presence of compiler features **/ - -#if __clang__ && !__INTEL_COMPILER - #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti) -#elif defined(_CPPRTTI) - #define __TBB_USE_OPTIONAL_RTTI 1 -#else - #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) -#endif - -/** Library features presence macros **/ - -#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L) -#define __TBB_CPP17_INVOKE_RESULT_PRESENT (__TBB_LANG >= 201703L) - -// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -// macro when this feature start working correctly on this compiler. -#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__) - #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L) - #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition -#elif __clang__ - #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates)) - #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition - #ifdef __cpp_deduction_guides - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L) - #else - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 - #endif -#elif __GNUC__ - #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000) - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606L) - #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201) -#elif _MSC_VER - #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700)) - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021)) - #define __TBB_CPP20_CONCEPTS_PRESENT (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER? -#else - #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) - #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__TBB_LANG >= 201703L) - #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 202002L) -#endif - -// GCC4.8 on RHEL7 does not support std::get_new_handler -#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) -// GCC4.8 on RHEL7 does not support std::is_trivially_copyable -#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__)) - -#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT 0 -#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT (_MSC_VER >= 1911) -#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT - -#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_)) -#define __TBB_RESUMABLE_TASKS 1 -#else -#define __TBB_RESUMABLE_TASKS 0 -#endif - -/* This macro marks incomplete code or comments describing ideas which are considered for the future. - * See also for plain comment with TODO and FIXME marks for small improvement opportunities. - */ -#define __TBB_TODO 0 - -/* Check which standard library we use. */ -/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */ -#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY - #include <cstddef> -#endif - -/** Target OS is either iOS* or iOS* simulator **/ -#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ - #define __TBB_IOS 1 -#endif - -#if __APPLE__ - #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ - && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 - // ICC does not correctly set the macro if -mmacosx-min-version is not specified - #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) - #else - #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - #endif -#endif - -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) - #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) -#endif - -#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) - -#if __TBB_CPP17_FALLTHROUGH_PRESENT - #define __TBB_fallthrough [[fallthrough]] -#elif __TBB_FALLTHROUGH_PRESENT - #define __TBB_fallthrough __attribute__ ((fallthrough)) -#else - #define __TBB_fallthrough -#endif - -#if __TBB_CPP17_NODISCARD_PRESENT - #define __TBB_nodiscard [[nodiscard]] -#elif __clang__ || __GNUC__ - #define __TBB_nodiscard __attribute__((warn_unused_result)) -#else - #define __TBB_nodiscard -#endif - -#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ - || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS) - - -#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__) - -#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) - -/** Internal TBB features & modes **/ - -/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when - it's necessary to test internal functions not exported from TBB DLLs -**/ -#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY) - #define __TBB_NO_IMPLICIT_LINKAGE 1 - #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 -#endif - -#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE) - #define __TBB_NO_IMPLICIT_LINKAGE 1 -#endif - -#if _MSC_VER - #if !__TBB_NO_IMPLICIT_LINKAGE - #ifdef _DEBUG - #pragma comment(lib, "tbb12_debug.lib") - #else - #pragma comment(lib, "tbb12.lib") - #endif - #endif -#endif - -#ifndef __TBB_SCHEDULER_OBSERVER - #define __TBB_SCHEDULER_OBSERVER 1 -#endif /* __TBB_SCHEDULER_OBSERVER */ - -#ifndef __TBB_FP_CONTEXT - #define __TBB_FP_CONTEXT 1 -#endif /* __TBB_FP_CONTEXT */ - -#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official - -#ifndef __TBB_ARENA_OBSERVER - #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER -#endif /* __TBB_ARENA_OBSERVER */ - -#ifndef __TBB_ARENA_BINDING - #define __TBB_ARENA_BINDING 1 -#endif - -#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD - #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 -#endif - -#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING - #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 -#endif - -#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY - #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 -#endif - -#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ - (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__)) - #define __TBB_SURVIVE_THREAD_SWITCH 1 -#endif /* __TBB_SURVIVE_THREAD_SWITCH */ - -#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES - #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD -#endif - -#ifndef __TBB_DEFAULT_PARTITIONER - #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner -#endif - -#ifndef __TBB_FLOW_TRACE_CODEPTR - #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD -#endif - -// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template -// instantiation site, which is too late for suppression of the corresponding messages for internal -// stuff. -#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) - #if (__TBB_LANG >= 201402L) - #define __TBB_DEPRECATED [[deprecated]] - #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] - #elif _MSC_VER - #define __TBB_DEPRECATED __declspec(deprecated) - #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) - #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__ - #define __TBB_DEPRECATED __attribute__((deprecated)) - #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) - #endif -#endif // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) - -#if !defined(__TBB_DEPRECATED) - #define __TBB_DEPRECATED - #define __TBB_DEPRECATED_MSG(msg) -#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES) - // Suppress deprecated messages from self - #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1 -#endif - -#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) - #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED - #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg) -#else - #define __TBB_DEPRECATED_VERBOSE - #define __TBB_DEPRECATED_VERBOSE_MSG(msg) -#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) - -#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900) - #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.") -#endif - -#ifdef _VARIADIC_MAX - #define __TBB_VARIADIC_MAX _VARIADIC_MAX -#else - #if _MSC_VER == 1700 - #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12 - #elif _MSC_VER == 1600 - #define __TBB_VARIADIC_MAX 10 // VS10 setting - #else - #define __TBB_VARIADIC_MAX 15 - #endif -#endif - -/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by - the bugs in compilers, standard or OS specific libraries. They should be - removed as soon as the corresponding bugs are fixed or the buggy OS/compiler - versions go out of the support list. -**/ - -// Some STL containers not support allocator traits in old GCC versions -#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301 - #define TBB_ALLOCATOR_TRAITS_BROKEN 1 -#endif - -// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op. -#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 - #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1 -#endif - -/** End of __TBB_XXX_BROKEN macro section **/ - -#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER) - // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702) - #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1 -#endif - -// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang -// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19) -#if __ANDROID__ - #include <android/api-level.h> -#endif - -#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES) - -#ifndef __TBB_PREVIEW_CRITICAL_TASKS -#define __TBB_PREVIEW_CRITICAL_TASKS 1 -#endif - -#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) -#endif - - -#if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500 - #define __TBB_ALIGNAS_AVAILABLE 1 -#else - #define __TBB_ALIGNAS_AVAILABLE 0 -#endif - -#endif // __TBB_detail__config_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__config_H +#define __TBB_detail__config_H + +/** This header is supposed to contain macro definitions only. + The macros defined here are intended to control such aspects of TBB build as + - presence of compiler features + - compilation modes + - feature sets + - known compiler/platform issues +**/ + +/* Check which standard library we use. */ +#include <cstddef> + +#if _MSC_VER + #define __TBB_EXPORTED_FUNC __cdecl + #define __TBB_EXPORTED_METHOD __thiscall +#else + #define __TBB_EXPORTED_FUNC + #define __TBB_EXPORTED_METHOD +#endif + +#if defined(_MSVC_LANG) + #define __TBB_LANG _MSVC_LANG +#else + #define __TBB_LANG __cplusplus +#endif // _MSVC_LANG + +#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) +#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L) + +#if __INTEL_COMPILER || _MSC_VER + #define __TBB_NOINLINE(decl) __declspec(noinline) decl +#elif __GNUC__ + #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) +#else + #define __TBB_NOINLINE(decl) decl +#endif + +#define __TBB_STRING_AUX(x) #x +#define __TBB_STRING(x) __TBB_STRING_AUX(x) + +// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match +// the actual GCC version on the system. +#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +/* Check which standard library we use. */ + +// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro. +// Therefore we use different ways to detect its version. +#ifdef TBB_USE_GLIBCXX_VERSION + // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro. + // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1. + #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION +#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__ + // Reported versions of GCC and libstdc++ do not match; trust the latter + #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000) +#elif __GLIBCPP__ || __GLIBCXX__ + // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION + #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION +#endif + +#if __clang__ + // according to clang documentation, version can be vendor specific + #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif + +/** Macro helpers **/ + +#define __TBB_CONCAT_AUX(A,B) A##B +// The additional level of indirection is needed to expand macros A and B (not to get the AB macro). +// See [cpp.subst] and [cpp.concat] for more details. +#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B) +// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty). +#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) +#define __TBB_MACRO_EMPTY 1 + +#if _M_X64 + #define __TBB_W(name) name##64 +#else + #define __TBB_W(name) name +#endif + +/** User controlled TBB features & modes **/ + +#ifndef TBB_USE_DEBUG + /* + There are four cases that are supported: + 1. "_DEBUG is undefined" means "no debug"; + 2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug"; + 3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug"; + 4. "_DEBUG defined to nothing (empty)" means "debug". + */ + #ifdef _DEBUG + // Check if _DEBUG is empty. + #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY) + #if __TBB_IS__DEBUG_EMPTY + #define TBB_USE_DEBUG 1 + #else + #define TBB_USE_DEBUG _DEBUG + #endif // __TBB_IS__DEBUG_EMPTY + #else + #define TBB_USE_DEBUG 0 + #endif // _DEBUG +#endif // TBB_USE_DEBUG + +#ifndef TBB_USE_ASSERT + #define TBB_USE_ASSERT TBB_USE_DEBUG +#endif // TBB_USE_ASSERT + +#ifndef TBB_USE_PROFILING_TOOLS +#if TBB_USE_DEBUG + #define TBB_USE_PROFILING_TOOLS 2 +#else // TBB_USE_DEBUG + #define TBB_USE_PROFILING_TOOLS 0 +#endif // TBB_USE_DEBUG +#endif // TBB_USE_PROFILING_TOOLS + +// Exceptions support cases +#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC) + #if TBB_USE_EXCEPTIONS + #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. + #elif !defined(TBB_USE_EXCEPTIONS) + #define TBB_USE_EXCEPTIONS 0 + #endif +#elif !defined(TBB_USE_EXCEPTIONS) + #define TBB_USE_EXCEPTIONS 1 +#endif + +/** Preprocessor symbols to determine HW architecture **/ + +#if _WIN32 || _WIN64 + #if defined(_M_X64) || defined(__x86_64__) // the latter for MinGW support + #define __TBB_x86_64 1 + #elif defined(_M_IA64) + #define __TBB_ipf 1 + #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support + #define __TBB_x86_32 1 + #else + #define __TBB_generic_arch 1 + #endif +#else /* Assume generic Unix */ + #if __x86_64__ + #define __TBB_x86_64 1 + #elif __ia64__ + #define __TBB_ipf 1 + #elif __i386__||__i386 // __i386 is for Sun OS + #define __TBB_x86_32 1 + #else + #define __TBB_generic_arch 1 + #endif +#endif + +/** Windows API or POSIX API **/ + +#if _WIN32 || _WIN64 + #define __TBB_USE_WINAPI 1 +#else + #define __TBB_USE_POSIX 1 +#endif + +/** Internal TBB features & modes **/ + +/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ +#ifndef __TBB_DYNAMIC_LOAD_ENABLED + #define __TBB_DYNAMIC_LOAD_ENABLED 1 +#endif + +/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load + shared libraries at run time only from application container **/ +#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP + #define __TBB_WIN8UI_SUPPORT 1 +#else + #define __TBB_WIN8UI_SUPPORT 0 +#endif + +/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ +#ifndef __TBB_WEAK_SYMBOLS_PRESENT + #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) +#endif + +/** Presence of compiler features **/ + +#if __clang__ && !__INTEL_COMPILER + #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti) +#elif defined(_CPPRTTI) + #define __TBB_USE_OPTIONAL_RTTI 1 +#else + #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) +#endif + +/** Library features presence macros **/ + +#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L) +#define __TBB_CPP17_INVOKE_RESULT_PRESENT (__TBB_LANG >= 201703L) + +// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// macro when this feature start working correctly on this compiler. +#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__) + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L) + #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition +#elif __clang__ + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates)) + #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition + #ifdef __cpp_deduction_guides + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L) + #else + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 + #endif +#elif __GNUC__ + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606L) + #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201) +#elif _MSC_VER + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700)) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021)) + #define __TBB_CPP20_CONCEPTS_PRESENT (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER? +#else + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__TBB_LANG >= 201703L) + #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 202002L) +#endif + +// GCC4.8 on RHEL7 does not support std::get_new_handler +#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) +// GCC4.8 on RHEL7 does not support std::is_trivially_copyable +#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__)) + +#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT 0 +#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT (_MSC_VER >= 1911) +#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT + +#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_)) +#define __TBB_RESUMABLE_TASKS 1 +#else +#define __TBB_RESUMABLE_TASKS 0 +#endif + +/* This macro marks incomplete code or comments describing ideas which are considered for the future. + * See also for plain comment with TODO and FIXME marks for small improvement opportunities. + */ +#define __TBB_TODO 0 + +/* Check which standard library we use. */ +/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */ +#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY + #include <cstddef> +#endif + +/** Target OS is either iOS* or iOS* simulator **/ +#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ + #define __TBB_IOS 1 +#endif + +#if __APPLE__ + #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 + // ICC does not correctly set the macro if -mmacosx-min-version is not specified + #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) + #else + #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ + #endif +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) + #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) +#endif + +#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) + +#if __TBB_CPP17_FALLTHROUGH_PRESENT + #define __TBB_fallthrough [[fallthrough]] +#elif __TBB_FALLTHROUGH_PRESENT + #define __TBB_fallthrough __attribute__ ((fallthrough)) +#else + #define __TBB_fallthrough +#endif + +#if __TBB_CPP17_NODISCARD_PRESENT + #define __TBB_nodiscard [[nodiscard]] +#elif __clang__ || __GNUC__ + #define __TBB_nodiscard __attribute__((warn_unused_result)) +#else + #define __TBB_nodiscard +#endif + +#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ + || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS) + + +#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__) + +#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) + +/** Internal TBB features & modes **/ + +/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when + it's necessary to test internal functions not exported from TBB DLLs +**/ +#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY) + #define __TBB_NO_IMPLICIT_LINKAGE 1 + #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 +#endif + +#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE) + #define __TBB_NO_IMPLICIT_LINKAGE 1 +#endif + +#if _MSC_VER + #if !__TBB_NO_IMPLICIT_LINKAGE + #ifdef _DEBUG + #pragma comment(lib, "tbb12_debug.lib") + #else + #pragma comment(lib, "tbb12.lib") + #endif + #endif +#endif + +#ifndef __TBB_SCHEDULER_OBSERVER + #define __TBB_SCHEDULER_OBSERVER 1 +#endif /* __TBB_SCHEDULER_OBSERVER */ + +#ifndef __TBB_FP_CONTEXT + #define __TBB_FP_CONTEXT 1 +#endif /* __TBB_FP_CONTEXT */ + +#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official + +#ifndef __TBB_ARENA_OBSERVER + #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER +#endif /* __TBB_ARENA_OBSERVER */ + +#ifndef __TBB_ARENA_BINDING + #define __TBB_ARENA_BINDING 1 +#endif + +#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD + #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 +#endif + +#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING + #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 +#endif + +#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY + #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 +#endif + +#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ + (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__)) + #define __TBB_SURVIVE_THREAD_SWITCH 1 +#endif /* __TBB_SURVIVE_THREAD_SWITCH */ + +#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES + #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD +#endif + +#ifndef __TBB_DEFAULT_PARTITIONER + #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner +#endif + +#ifndef __TBB_FLOW_TRACE_CODEPTR + #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD +#endif + +// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template +// instantiation site, which is too late for suppression of the corresponding messages for internal +// stuff. +#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) + #if (__TBB_LANG >= 201402L) + #define __TBB_DEPRECATED [[deprecated]] + #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] + #elif _MSC_VER + #define __TBB_DEPRECATED __declspec(deprecated) + #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) + #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__ + #define __TBB_DEPRECATED __attribute__((deprecated)) + #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) + #endif +#endif // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + +#if !defined(__TBB_DEPRECATED) + #define __TBB_DEPRECATED + #define __TBB_DEPRECATED_MSG(msg) +#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES) + // Suppress deprecated messages from self + #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1 +#endif + +#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED + #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg) +#else + #define __TBB_DEPRECATED_VERBOSE + #define __TBB_DEPRECATED_VERBOSE_MSG(msg) +#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + +#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900) + #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.") +#endif + +#ifdef _VARIADIC_MAX + #define __TBB_VARIADIC_MAX _VARIADIC_MAX +#else + #if _MSC_VER == 1700 + #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12 + #elif _MSC_VER == 1600 + #define __TBB_VARIADIC_MAX 10 // VS10 setting + #else + #define __TBB_VARIADIC_MAX 15 + #endif +#endif + +/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by + the bugs in compilers, standard or OS specific libraries. They should be + removed as soon as the corresponding bugs are fixed or the buggy OS/compiler + versions go out of the support list. +**/ + +// Some STL containers not support allocator traits in old GCC versions +#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301 + #define TBB_ALLOCATOR_TRAITS_BROKEN 1 +#endif + +// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op. +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1 +#endif + +/** End of __TBB_XXX_BROKEN macro section **/ + +#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER) + // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702) + #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1 +#endif + +// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang +// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19) +#if __ANDROID__ + #include <android/api-level.h> +#endif + +#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES) + +#ifndef __TBB_PREVIEW_CRITICAL_TASKS +#define __TBB_PREVIEW_CRITICAL_TASKS 1 +#endif + +#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) +#endif + + +#if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500 + #define __TBB_ALIGNAS_AVAILABLE 1 +#else + #define __TBB_ALIGNAS_AVAILABLE 0 +#endif + +#endif // __TBB_detail__config_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h index 4dca07fa10..89ac137451 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h @@ -1,67 +1,67 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__containers_helpers_H -#define __TBB_detail__containers_helpers_H - -#include "_template_helpers.h" -#include "_allocator_traits.h" -#include <type_traits> -#include <memory> -#include <functional> - -namespace tbb { -namespace detail { -inline namespace d0 { - -template <typename Compare, typename = void> -struct comp_is_transparent : std::false_type {}; - -template <typename Compare> -struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {}; - -template <typename Key, typename Hasher, typename KeyEqual, typename = void > -struct has_transparent_key_equal : std::false_type { using type = KeyEqual; }; - -template <typename Key, typename Hasher, typename KeyEqual> -struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type { - using type = typename Hasher::transparent_key_equal; - static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type."); - static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value || - std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal."); - }; - -struct is_iterator_impl { -template <typename T> -using iter_traits_category = typename std::iterator_traits<T>::iterator_category; - -template <typename T> -using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type; -}; // struct is_iterator_impl - -template <typename T> -using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>; - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename T> -inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value; -#endif - -} // inline namespace d0 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__containers_helpers_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__containers_helpers_H +#define __TBB_detail__containers_helpers_H + +#include "_template_helpers.h" +#include "_allocator_traits.h" +#include <type_traits> +#include <memory> +#include <functional> + +namespace tbb { +namespace detail { +inline namespace d0 { + +template <typename Compare, typename = void> +struct comp_is_transparent : std::false_type {}; + +template <typename Compare> +struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {}; + +template <typename Key, typename Hasher, typename KeyEqual, typename = void > +struct has_transparent_key_equal : std::false_type { using type = KeyEqual; }; + +template <typename Key, typename Hasher, typename KeyEqual> +struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type { + using type = typename Hasher::transparent_key_equal; + static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type."); + static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value || + std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal."); + }; + +struct is_iterator_impl { +template <typename T> +using iter_traits_category = typename std::iterator_traits<T>::iterator_category; + +template <typename T> +using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type; +}; // struct is_iterator_impl + +template <typename T> +using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename T> +inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value; +#endif + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__containers_helpers_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h index 9764209fa8..05596ce797 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h @@ -1,88 +1,88 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__exception_H -#define __TBB__exception_H - -#include "_config.h" - -#include <new> // std::bad_alloc -#include <exception> // std::exception -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -#include <stdexcept> // std::runtime_error -#endif - -namespace tbb { -namespace detail { -inline namespace d0 { -enum class exception_id { - bad_alloc = 1, - bad_last_alloc, - user_abort, - nonpositive_step, - out_of_range, - reservation_length_error, - missing_wait, - invalid_load_factor, - invalid_key, - bad_tagged_msg_cast, - unsafe_wait, - last_entry -}; -} // namespace d0 - -namespace r1 { -//! Exception for concurrent containers -class bad_last_alloc : public std::bad_alloc { -public: - const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; -}; - -//! Exception for user-initiated abort -class user_abort : public std::exception { -public: - const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; -}; - -//! Exception for missing wait on structured_task_group -class missing_wait : public std::exception { -public: - const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; -}; - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -//! Exception for impossible finalization of task_sheduler_handle -class unsafe_wait : public std::runtime_error { -public: - unsafe_wait(const char* msg) : std::runtime_error(msg) {} -}; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -//! Gathers all throw operators in one place. -/** Its purpose is to minimize code bloat that can be caused by throw operators - scattered in multiple places, especially in templates. **/ -void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); -} // namespace r1 - -inline namespace d0 { -using r1::throw_exception; -} // namespace d0 - -} // namespace detail -} // namespace tbb - -#endif // __TBB__exception_H - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__exception_H +#define __TBB__exception_H + +#include "_config.h" + +#include <new> // std::bad_alloc +#include <exception> // std::exception +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +#include <stdexcept> // std::runtime_error +#endif + +namespace tbb { +namespace detail { +inline namespace d0 { +enum class exception_id { + bad_alloc = 1, + bad_last_alloc, + user_abort, + nonpositive_step, + out_of_range, + reservation_length_error, + missing_wait, + invalid_load_factor, + invalid_key, + bad_tagged_msg_cast, + unsafe_wait, + last_entry +}; +} // namespace d0 + +namespace r1 { +//! Exception for concurrent containers +class bad_last_alloc : public std::bad_alloc { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +//! Exception for user-initiated abort +class user_abort : public std::exception { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +//! Exception for missing wait on structured_task_group +class missing_wait : public std::exception { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! Exception for impossible finalization of task_sheduler_handle +class unsafe_wait : public std::runtime_error { +public: + unsafe_wait(const char* msg) : std::runtime_error(msg) {} +}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +//! Gathers all throw operators in one place. +/** Its purpose is to minimize code bloat that can be caused by throw operators + scattered in multiple places, especially in templates. **/ +void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); +} // namespace r1 + +inline namespace d0 { +using r1::throw_exception; +} // namespace d0 + +} // namespace detail +} // namespace tbb + +#endif // __TBB__exception_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h index 34ba1efcaf..3810e0392a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h @@ -1,371 +1,371 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_body_impl_H -#define __TBB__flow_graph_body_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included in namespace tbb::detail::d1 (in flow_graph.h) - -typedef std::uint64_t tag_value; - - -// TODO revamp: find out if there is already helper for has_policy. -template<typename ... Policies> struct Policy {}; - -template<typename ... Policies> struct has_policy; - -template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies> -struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> : - std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value || - has_policy<ExpectedPolicy, Policies...>::value> {}; - -template<typename ExpectedPolicy, typename SinglePolicy> -struct has_policy<ExpectedPolicy, SinglePolicy> : - std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {}; - -template<typename ExpectedPolicy, typename ...Policies> -struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {}; - -namespace graph_policy_namespace { - - struct rejecting { }; - struct reserving { }; - struct queueing { }; - struct lightweight { }; - - // K == type of field used for key-matching. Each tag-matching port will be provided - // functor that, given an object accepted by the port, will return the - /// field of type K being used for matching. - template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > > - struct key_matching { - typedef K key_type; - typedef typename std::decay<K>::type base_key_type; - typedef KHash hash_compare_type; - }; - - // old tag_matching join's new specifier - typedef key_matching<tag_value> tag_matching; - - // Aliases for Policy combinations - typedef Policy<queueing, lightweight> queueing_lightweight; - typedef Policy<rejecting, lightweight> rejecting_lightweight; - -} // namespace graph_policy_namespace - -// -------------- function_body containers ---------------------- - -//! A functor that takes no input and generates a value of type Output -template< typename Output > -class input_body : no_assign { -public: - virtual ~input_body() {} - virtual Output operator()(flow_control& fc) = 0; - virtual input_body* clone() = 0; -}; - -//! The leaf for input_body -template< typename Output, typename Body> -class input_body_leaf : public input_body<Output> { -public: - input_body_leaf( const Body &_body ) : body(_body) { } - Output operator()(flow_control& fc) override { return body(fc); } - input_body_leaf* clone() override { - return new input_body_leaf< Output, Body >(body); - } - Body get_body() { return body; } -private: - Body body; -}; - -//! A functor that takes an Input and generates an Output -template< typename Input, typename Output > -class function_body : no_assign { -public: - virtual ~function_body() {} - virtual Output operator()(const Input &input) = 0; - virtual function_body* clone() = 0; -}; - -//! the leaf for function_body -template <typename Input, typename Output, typename B> -class function_body_leaf : public function_body< Input, Output > { -public: - function_body_leaf( const B &_body ) : body(_body) { } - Output operator()(const Input &i) override { return body(i); } - B get_body() { return body; } - function_body_leaf* clone() override { - return new function_body_leaf< Input, Output, B >(body); - } -private: - B body; -}; - -//! the leaf for function_body specialized for Input and output of continue_msg -template <typename B> -class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > { -public: - function_body_leaf( const B &_body ) : body(_body) { } - continue_msg operator()( const continue_msg &i ) override { - body(i); - return i; - } - B get_body() { return body; } - function_body_leaf* clone() override { - return new function_body_leaf< continue_msg, continue_msg, B >(body); - } -private: - B body; -}; - -//! the leaf for function_body specialized for Output of continue_msg -template <typename Input, typename B> -class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > { -public: - function_body_leaf( const B &_body ) : body(_body) { } - continue_msg operator()(const Input &i) override { - body(i); - return continue_msg(); - } - B get_body() { return body; } - function_body_leaf* clone() override { - return new function_body_leaf< Input, continue_msg, B >(body); - } -private: - B body; -}; - -//! the leaf for function_body specialized for Input of continue_msg -template <typename Output, typename B> -class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > { -public: - function_body_leaf( const B &_body ) : body(_body) { } - Output operator()(const continue_msg &i) override { - return body(i); - } - B get_body() { return body; } - function_body_leaf* clone() override { - return new function_body_leaf< continue_msg, Output, B >(body); - } -private: - B body; -}; - -//! function_body that takes an Input and a set of output ports -template<typename Input, typename OutputSet> -class multifunction_body : no_assign { -public: - virtual ~multifunction_body () {} - virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0; - virtual multifunction_body* clone() = 0; - virtual void* get_body_ptr() = 0; -}; - -//! leaf for multifunction. OutputSet can be a std::tuple or a vector. -template<typename Input, typename OutputSet, typename B > -class multifunction_body_leaf : public multifunction_body<Input, OutputSet> { -public: - multifunction_body_leaf(const B &_body) : body(_body) { } - void operator()(const Input &input, OutputSet &oset) override { - body(input, oset); // body may explicitly put() to one or more of oset. - } - void* get_body_ptr() override { return &body; } - multifunction_body_leaf* clone() override { - return new multifunction_body_leaf<Input, OutputSet,B>(body); - } - -private: - B body; -}; - -// ------ function bodies for hash_buffers and key-matching joins. - -template<typename Input, typename Output> -class type_to_key_function_body : no_assign { - public: - virtual ~type_to_key_function_body() {} - virtual Output operator()(const Input &input) = 0; // returns an Output - virtual type_to_key_function_body* clone() = 0; -}; - -// specialization for ref output -template<typename Input, typename Output> -class type_to_key_function_body<Input,Output&> : no_assign { - public: - virtual ~type_to_key_function_body() {} - virtual const Output & operator()(const Input &input) = 0; // returns a const Output& - virtual type_to_key_function_body* clone() = 0; -}; - -template <typename Input, typename Output, typename B> -class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> { -public: - type_to_key_function_body_leaf( const B &_body ) : body(_body) { } - Output operator()(const Input &i) override { return body(i); } - type_to_key_function_body_leaf* clone() override { - return new type_to_key_function_body_leaf< Input, Output, B>(body); - } -private: - B body; -}; - -template <typename Input, typename Output, typename B> -class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> { -public: - type_to_key_function_body_leaf( const B &_body ) : body(_body) { } - const Output& operator()(const Input &i) override { - return body(i); - } - type_to_key_function_body_leaf* clone() override { - return new type_to_key_function_body_leaf< Input, Output&, B>(body); - } -private: - B body; -}; - -// --------------------------- end of function_body containers ------------------------ - -// --------------------------- node task bodies --------------------------------------- - -//! A task that calls a node's forward_task function -template< typename NodeType > -class forward_task_bypass : public graph_task { - NodeType &my_node; -public: - forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n - , node_priority_t node_priority = no_priority - ) : graph_task(g, allocator, node_priority), - my_node(n) {} - - task* execute(execution_data& ed) override { - graph_task* next_task = my_node.forward_task(); - if (SUCCESSFULLY_ENQUEUED == next_task) - next_task = nullptr; - else if (next_task) - next_task = prioritize_task(my_node.graph_reference(), *next_task); - finalize(ed); - return next_task; - } -}; - -//! A task that calls a node's apply_body_bypass function, passing in an input of type Input -// return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL -template< typename NodeType, typename Input > -class apply_body_task_bypass : public graph_task { - NodeType &my_node; - Input my_input; -public: - - apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i - , node_priority_t node_priority = no_priority - ) : graph_task(g, allocator, node_priority), - my_node(n), my_input(i) {} - - task* execute(execution_data& ed) override { - graph_task* next_task = my_node.apply_body_bypass( my_input ); - if (SUCCESSFULLY_ENQUEUED == next_task) - next_task = nullptr; - else if (next_task) - next_task = prioritize_task(my_node.graph_reference(), *next_task); - finalize(ed); - return next_task; - - } -}; - -//! A task that calls a node's apply_body_bypass function with no input -template< typename NodeType > -class input_node_task_bypass : public graph_task { - NodeType &my_node; -public: - input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) - : graph_task(g, allocator), my_node(n) {} - - task* execute(execution_data& ed) override { - graph_task* next_task = my_node.apply_body_bypass( ); - if (SUCCESSFULLY_ENQUEUED == next_task) - next_task = nullptr; - else if (next_task) - next_task = prioritize_task(my_node.graph_reference(), *next_task); - finalize(ed); - return next_task; - } - -}; - -// ------------------------ end of node task bodies ----------------------------------- - -template<typename T, typename DecrementType, typename DummyType = void> -class threshold_regulator; - -template<typename T, typename DecrementType> -class threshold_regulator<T, DecrementType, - typename std::enable_if<std::is_integral<DecrementType>::value>::type> - : public receiver<DecrementType>, no_copy -{ - T* my_node; -protected: - - graph_task* try_put_task( const DecrementType& value ) override { - graph_task* result = my_node->decrement_counter( value ); - if( !result ) - result = SUCCESSFULLY_ENQUEUED; - return result; - } - - graph& graph_reference() const override { - return my_node->my_graph; - } - - template<typename U, typename V> friend class limiter_node; - void reset_receiver( reset_flags ) {} - -public: - threshold_regulator(T* owner) : my_node(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } -}; - -template<typename T> -class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy { - - T *my_node; - - graph_task* execute() override { - return my_node->decrement_counter( 1 ); - } - -protected: - - graph& graph_reference() const override { - return my_node->my_graph; - } - -public: - - typedef continue_msg input_type; - typedef continue_msg output_type; - threshold_regulator(T* owner) - : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) - { - // Do not work with the passed pointer here as it may not be fully initialized yet - } -}; - -#endif // __TBB__flow_graph_body_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_body_impl_H +#define __TBB__flow_graph_body_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 (in flow_graph.h) + +typedef std::uint64_t tag_value; + + +// TODO revamp: find out if there is already helper for has_policy. +template<typename ... Policies> struct Policy {}; + +template<typename ... Policies> struct has_policy; + +template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies> +struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> : + std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value || + has_policy<ExpectedPolicy, Policies...>::value> {}; + +template<typename ExpectedPolicy, typename SinglePolicy> +struct has_policy<ExpectedPolicy, SinglePolicy> : + std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {}; + +template<typename ExpectedPolicy, typename ...Policies> +struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {}; + +namespace graph_policy_namespace { + + struct rejecting { }; + struct reserving { }; + struct queueing { }; + struct lightweight { }; + + // K == type of field used for key-matching. Each tag-matching port will be provided + // functor that, given an object accepted by the port, will return the + /// field of type K being used for matching. + template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > > + struct key_matching { + typedef K key_type; + typedef typename std::decay<K>::type base_key_type; + typedef KHash hash_compare_type; + }; + + // old tag_matching join's new specifier + typedef key_matching<tag_value> tag_matching; + + // Aliases for Policy combinations + typedef Policy<queueing, lightweight> queueing_lightweight; + typedef Policy<rejecting, lightweight> rejecting_lightweight; + +} // namespace graph_policy_namespace + +// -------------- function_body containers ---------------------- + +//! A functor that takes no input and generates a value of type Output +template< typename Output > +class input_body : no_assign { +public: + virtual ~input_body() {} + virtual Output operator()(flow_control& fc) = 0; + virtual input_body* clone() = 0; +}; + +//! The leaf for input_body +template< typename Output, typename Body> +class input_body_leaf : public input_body<Output> { +public: + input_body_leaf( const Body &_body ) : body(_body) { } + Output operator()(flow_control& fc) override { return body(fc); } + input_body_leaf* clone() override { + return new input_body_leaf< Output, Body >(body); + } + Body get_body() { return body; } +private: + Body body; +}; + +//! A functor that takes an Input and generates an Output +template< typename Input, typename Output > +class function_body : no_assign { +public: + virtual ~function_body() {} + virtual Output operator()(const Input &input) = 0; + virtual function_body* clone() = 0; +}; + +//! the leaf for function_body +template <typename Input, typename Output, typename B> +class function_body_leaf : public function_body< Input, Output > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const Input &i) override { return body(i); } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< Input, Output, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Input and output of continue_msg +template <typename B> +class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + continue_msg operator()( const continue_msg &i ) override { + body(i); + return i; + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< continue_msg, continue_msg, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Output of continue_msg +template <typename Input, typename B> +class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + continue_msg operator()(const Input &i) override { + body(i); + return continue_msg(); + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< Input, continue_msg, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Input of continue_msg +template <typename Output, typename B> +class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const continue_msg &i) override { + return body(i); + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< continue_msg, Output, B >(body); + } +private: + B body; +}; + +//! function_body that takes an Input and a set of output ports +template<typename Input, typename OutputSet> +class multifunction_body : no_assign { +public: + virtual ~multifunction_body () {} + virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0; + virtual multifunction_body* clone() = 0; + virtual void* get_body_ptr() = 0; +}; + +//! leaf for multifunction. OutputSet can be a std::tuple or a vector. +template<typename Input, typename OutputSet, typename B > +class multifunction_body_leaf : public multifunction_body<Input, OutputSet> { +public: + multifunction_body_leaf(const B &_body) : body(_body) { } + void operator()(const Input &input, OutputSet &oset) override { + body(input, oset); // body may explicitly put() to one or more of oset. + } + void* get_body_ptr() override { return &body; } + multifunction_body_leaf* clone() override { + return new multifunction_body_leaf<Input, OutputSet,B>(body); + } + +private: + B body; +}; + +// ------ function bodies for hash_buffers and key-matching joins. + +template<typename Input, typename Output> +class type_to_key_function_body : no_assign { + public: + virtual ~type_to_key_function_body() {} + virtual Output operator()(const Input &input) = 0; // returns an Output + virtual type_to_key_function_body* clone() = 0; +}; + +// specialization for ref output +template<typename Input, typename Output> +class type_to_key_function_body<Input,Output&> : no_assign { + public: + virtual ~type_to_key_function_body() {} + virtual const Output & operator()(const Input &input) = 0; // returns a const Output& + virtual type_to_key_function_body* clone() = 0; +}; + +template <typename Input, typename Output, typename B> +class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> { +public: + type_to_key_function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const Input &i) override { return body(i); } + type_to_key_function_body_leaf* clone() override { + return new type_to_key_function_body_leaf< Input, Output, B>(body); + } +private: + B body; +}; + +template <typename Input, typename Output, typename B> +class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> { +public: + type_to_key_function_body_leaf( const B &_body ) : body(_body) { } + const Output& operator()(const Input &i) override { + return body(i); + } + type_to_key_function_body_leaf* clone() override { + return new type_to_key_function_body_leaf< Input, Output&, B>(body); + } +private: + B body; +}; + +// --------------------------- end of function_body containers ------------------------ + +// --------------------------- node task bodies --------------------------------------- + +//! A task that calls a node's forward_task function +template< typename NodeType > +class forward_task_bypass : public graph_task { + NodeType &my_node; +public: + forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n + , node_priority_t node_priority = no_priority + ) : graph_task(g, allocator, node_priority), + my_node(n) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.forward_task(); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + } +}; + +//! A task that calls a node's apply_body_bypass function, passing in an input of type Input +// return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL +template< typename NodeType, typename Input > +class apply_body_task_bypass : public graph_task { + NodeType &my_node; + Input my_input; +public: + + apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i + , node_priority_t node_priority = no_priority + ) : graph_task(g, allocator, node_priority), + my_node(n), my_input(i) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.apply_body_bypass( my_input ); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + + } +}; + +//! A task that calls a node's apply_body_bypass function with no input +template< typename NodeType > +class input_node_task_bypass : public graph_task { + NodeType &my_node; +public: + input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) + : graph_task(g, allocator), my_node(n) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.apply_body_bypass( ); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + } + +}; + +// ------------------------ end of node task bodies ----------------------------------- + +template<typename T, typename DecrementType, typename DummyType = void> +class threshold_regulator; + +template<typename T, typename DecrementType> +class threshold_regulator<T, DecrementType, + typename std::enable_if<std::is_integral<DecrementType>::value>::type> + : public receiver<DecrementType>, no_copy +{ + T* my_node; +protected: + + graph_task* try_put_task( const DecrementType& value ) override { + graph_task* result = my_node->decrement_counter( value ); + if( !result ) + result = SUCCESSFULLY_ENQUEUED; + return result; + } + + graph& graph_reference() const override { + return my_node->my_graph; + } + + template<typename U, typename V> friend class limiter_node; + void reset_receiver( reset_flags ) {} + +public: + threshold_regulator(T* owner) : my_node(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } +}; + +template<typename T> +class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy { + + T *my_node; + + graph_task* execute() override { + return my_node->decrement_counter( 1 ); + } + +protected: + + graph& graph_reference() const override { + return my_node->my_graph; + } + +public: + + typedef continue_msg input_type; + typedef continue_msg output_type; + threshold_regulator(T* owner) + : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } +}; + +#endif // __TBB__flow_graph_body_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h index ac5564598b..22d31cdcbb 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h @@ -1,435 +1,435 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_cache_impl_H -#define __TBB__flow_graph_cache_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included in namespace tbb::detail::d1 (in flow_graph.h) - -//! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock. -template< typename T, typename M=spin_mutex > -class node_cache { - public: - - typedef size_t size_type; - - bool empty() { - typename mutex_type::scoped_lock lock( my_mutex ); - return internal_empty(); - } - - void add( T &n ) { - typename mutex_type::scoped_lock lock( my_mutex ); - internal_push(n); - } - - void remove( T &n ) { - typename mutex_type::scoped_lock lock( my_mutex ); - for ( size_t i = internal_size(); i != 0; --i ) { - T &s = internal_pop(); - if ( &s == &n ) - break; // only remove one predecessor per request - internal_push(s); - } - } - - void clear() { - while( !my_q.empty()) (void)my_q.pop(); - } - -protected: - - typedef M mutex_type; - mutex_type my_mutex; - std::queue< T * > my_q; - - // Assumes lock is held - inline bool internal_empty( ) { - return my_q.empty(); - } - - // Assumes lock is held - inline size_type internal_size( ) { - return my_q.size(); - } - - // Assumes lock is held - inline void internal_push( T &n ) { - my_q.push(&n); - } - - // Assumes lock is held - inline T &internal_pop() { - T *v = my_q.front(); - my_q.pop(); - return *v; - } - -}; - -//! A cache of predecessors that only supports try_get -template< typename T, typename M=spin_mutex > -class predecessor_cache : public node_cache< sender<T>, M > { -public: - typedef M mutex_type; - typedef T output_type; - typedef sender<output_type> predecessor_type; - typedef receiver<output_type> successor_type; - - predecessor_cache( successor_type* owner ) : my_owner( owner ) { - __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - bool get_item( output_type& v ) { - - bool msg = false; - - do { - predecessor_type *src; - { - typename mutex_type::scoped_lock lock(this->my_mutex); - if ( this->internal_empty() ) { - break; - } - src = &this->internal_pop(); - } - - // Try to get from this sender - msg = src->try_get( v ); - - if (msg == false) { - // Relinquish ownership of the edge - register_successor(*src, *my_owner); - } else { - // Retain ownership of the edge - this->add(*src); - } - } while ( msg == false ); - return msg; - } - - // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). - void reset() { - for(;;) { - predecessor_type *src; - { - if (this->internal_empty()) break; - src = &this->internal_pop(); - } - register_successor(*src, *my_owner); - } - } - -protected: - successor_type* my_owner; -}; - -//! An cache of predecessors that supports requests and reservations -template< typename T, typename M=spin_mutex > -class reservable_predecessor_cache : public predecessor_cache< T, M > { -public: - typedef M mutex_type; - typedef T output_type; - typedef sender<T> predecessor_type; - typedef receiver<T> successor_type; - - reservable_predecessor_cache( successor_type* owner ) - : predecessor_cache<T,M>(owner), reserved_src(NULL) - { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - bool - try_reserve( output_type &v ) { - bool msg = false; - - do { - { - typename mutex_type::scoped_lock lock(this->my_mutex); - if ( reserved_src || this->internal_empty() ) - return false; - - reserved_src = &this->internal_pop(); - } - - // Try to get from this sender - msg = reserved_src->try_reserve( v ); - - if (msg == false) { - typename mutex_type::scoped_lock lock(this->my_mutex); - // Relinquish ownership of the edge - register_successor( *reserved_src, *this->my_owner ); - reserved_src = NULL; - } else { - // Retain ownership of the edge - this->add( *reserved_src ); - } - } while ( msg == false ); - - return msg; - } - - bool - try_release( ) { - reserved_src->try_release( ); - reserved_src = NULL; - return true; - } - - bool - try_consume( ) { - reserved_src->try_consume( ); - reserved_src = NULL; - return true; - } - - void reset( ) { - reserved_src = NULL; - predecessor_cache<T,M>::reset( ); - } - - void clear() { - reserved_src = NULL; - predecessor_cache<T,M>::clear(); - } - -private: - predecessor_type *reserved_src; -}; - - -//! An abstract cache of successors -template<typename T, typename M=spin_rw_mutex > -class successor_cache : no_copy { -protected: - - typedef M mutex_type; - mutex_type my_mutex; - - typedef receiver<T> successor_type; - typedef receiver<T>* pointer_type; - typedef sender<T> owner_type; - // TODO revamp: introduce heapified collection of successors for strict priorities - typedef std::list< pointer_type > successors_type; - successors_type my_successors; - - owner_type* my_owner; - -public: - successor_cache( owner_type* owner ) : my_owner(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - virtual ~successor_cache() {} - - void register_successor( successor_type& r ) { - typename mutex_type::scoped_lock l(my_mutex, true); - if( r.priority() != no_priority ) - my_successors.push_front( &r ); - else - my_successors.push_back( &r ); - } - - void remove_successor( successor_type& r ) { - typename mutex_type::scoped_lock l(my_mutex, true); - for ( typename successors_type::iterator i = my_successors.begin(); - i != my_successors.end(); ++i ) { - if ( *i == & r ) { - my_successors.erase(i); - break; - } - } - } - - bool empty() { - typename mutex_type::scoped_lock l(my_mutex, false); - return my_successors.empty(); - } - - void clear() { - my_successors.clear(); - } - - virtual graph_task* try_put_task( const T& t ) = 0; -}; // successor_cache<T> - -//! An abstract cache of successors, specialized to continue_msg -template<typename M> -class successor_cache< continue_msg, M > : no_copy { -protected: - - typedef M mutex_type; - mutex_type my_mutex; - - typedef receiver<continue_msg> successor_type; - typedef receiver<continue_msg>* pointer_type; - typedef sender<continue_msg> owner_type; - typedef std::list< pointer_type > successors_type; - successors_type my_successors; - owner_type* my_owner; - -public: - successor_cache( sender<continue_msg>* owner ) : my_owner(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - virtual ~successor_cache() {} - - void register_successor( successor_type& r ) { - typename mutex_type::scoped_lock l(my_mutex, true); - if( r.priority() != no_priority ) - my_successors.push_front( &r ); - else - my_successors.push_back( &r ); - __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); - if ( r.is_continue_receiver() ) { - r.register_predecessor( *my_owner ); - } - } - - void remove_successor( successor_type& r ) { - typename mutex_type::scoped_lock l(my_mutex, true); - for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { - if ( *i == &r ) { - __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); - // TODO: check if we need to test for continue_receiver before removing from r. - r.remove_predecessor( *my_owner ); - my_successors.erase(i); - break; - } - } - } - - bool empty() { - typename mutex_type::scoped_lock l(my_mutex, false); - return my_successors.empty(); - } - - void clear() { - my_successors.clear(); - } - - virtual graph_task* try_put_task( const continue_msg& t ) = 0; -}; // successor_cache< continue_msg > - -//! A cache of successors that are broadcast to -template<typename T, typename M=spin_rw_mutex> -class broadcast_cache : public successor_cache<T, M> { - typedef successor_cache<T, M> base_type; - typedef M mutex_type; - typedef typename successor_cache<T,M>::successors_type successors_type; - -public: - - broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - // as above, but call try_put_task instead, and return the last task we received (if any) - graph_task* try_put_task( const T &t ) override { - graph_task * last_task = nullptr; - typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); - typename successors_type::iterator i = this->my_successors.begin(); - while ( i != this->my_successors.end() ) { - graph_task *new_task = (*i)->try_put_task(t); - // workaround for icc bug - graph& graph_ref = (*i)->graph_reference(); - last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary - if(new_task) { - ++i; - } - else { // failed - if ( (*i)->register_predecessor(*this->my_owner) ) { - i = this->my_successors.erase(i); - } else { - ++i; - } - } - } - return last_task; - } - - // call try_put_task and return list of received tasks - bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { - bool is_at_least_one_put_successful = false; - typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); - typename successors_type::iterator i = this->my_successors.begin(); - while ( i != this->my_successors.end() ) { - graph_task * new_task = (*i)->try_put_task(t); - if(new_task) { - ++i; - if(new_task != SUCCESSFULLY_ENQUEUED) { - tasks.push_back(*new_task); - } - is_at_least_one_put_successful = true; - } - else { // failed - if ( (*i)->register_predecessor(*this->my_owner) ) { - i = this->my_successors.erase(i); - } else { - ++i; - } - } - } - return is_at_least_one_put_successful; - } -}; - -//! A cache of successors that are put in a round-robin fashion -template<typename T, typename M=spin_rw_mutex > -class round_robin_cache : public successor_cache<T, M> { - typedef successor_cache<T, M> base_type; - typedef size_t size_type; - typedef M mutex_type; - typedef typename successor_cache<T,M>::successors_type successors_type; - -public: - - round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - size_type size() { - typename mutex_type::scoped_lock l(this->my_mutex, false); - return this->my_successors.size(); - } - - graph_task* try_put_task( const T &t ) override { - typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); - typename successors_type::iterator i = this->my_successors.begin(); - while ( i != this->my_successors.end() ) { - graph_task* new_task = (*i)->try_put_task(t); - if ( new_task ) { - return new_task; - } else { - if ( (*i)->register_predecessor(*this->my_owner) ) { - i = this->my_successors.erase(i); - } - else { - ++i; - } - } - } - return NULL; - } -}; - -#endif // __TBB__flow_graph_cache_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_cache_impl_H +#define __TBB__flow_graph_cache_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 (in flow_graph.h) + +//! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock. +template< typename T, typename M=spin_mutex > +class node_cache { + public: + + typedef size_t size_type; + + bool empty() { + typename mutex_type::scoped_lock lock( my_mutex ); + return internal_empty(); + } + + void add( T &n ) { + typename mutex_type::scoped_lock lock( my_mutex ); + internal_push(n); + } + + void remove( T &n ) { + typename mutex_type::scoped_lock lock( my_mutex ); + for ( size_t i = internal_size(); i != 0; --i ) { + T &s = internal_pop(); + if ( &s == &n ) + break; // only remove one predecessor per request + internal_push(s); + } + } + + void clear() { + while( !my_q.empty()) (void)my_q.pop(); + } + +protected: + + typedef M mutex_type; + mutex_type my_mutex; + std::queue< T * > my_q; + + // Assumes lock is held + inline bool internal_empty( ) { + return my_q.empty(); + } + + // Assumes lock is held + inline size_type internal_size( ) { + return my_q.size(); + } + + // Assumes lock is held + inline void internal_push( T &n ) { + my_q.push(&n); + } + + // Assumes lock is held + inline T &internal_pop() { + T *v = my_q.front(); + my_q.pop(); + return *v; + } + +}; + +//! A cache of predecessors that only supports try_get +template< typename T, typename M=spin_mutex > +class predecessor_cache : public node_cache< sender<T>, M > { +public: + typedef M mutex_type; + typedef T output_type; + typedef sender<output_type> predecessor_type; + typedef receiver<output_type> successor_type; + + predecessor_cache( successor_type* owner ) : my_owner( owner ) { + __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + bool get_item( output_type& v ) { + + bool msg = false; + + do { + predecessor_type *src; + { + typename mutex_type::scoped_lock lock(this->my_mutex); + if ( this->internal_empty() ) { + break; + } + src = &this->internal_pop(); + } + + // Try to get from this sender + msg = src->try_get( v ); + + if (msg == false) { + // Relinquish ownership of the edge + register_successor(*src, *my_owner); + } else { + // Retain ownership of the edge + this->add(*src); + } + } while ( msg == false ); + return msg; + } + + // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). + void reset() { + for(;;) { + predecessor_type *src; + { + if (this->internal_empty()) break; + src = &this->internal_pop(); + } + register_successor(*src, *my_owner); + } + } + +protected: + successor_type* my_owner; +}; + +//! An cache of predecessors that supports requests and reservations +template< typename T, typename M=spin_mutex > +class reservable_predecessor_cache : public predecessor_cache< T, M > { +public: + typedef M mutex_type; + typedef T output_type; + typedef sender<T> predecessor_type; + typedef receiver<T> successor_type; + + reservable_predecessor_cache( successor_type* owner ) + : predecessor_cache<T,M>(owner), reserved_src(NULL) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + bool + try_reserve( output_type &v ) { + bool msg = false; + + do { + { + typename mutex_type::scoped_lock lock(this->my_mutex); + if ( reserved_src || this->internal_empty() ) + return false; + + reserved_src = &this->internal_pop(); + } + + // Try to get from this sender + msg = reserved_src->try_reserve( v ); + + if (msg == false) { + typename mutex_type::scoped_lock lock(this->my_mutex); + // Relinquish ownership of the edge + register_successor( *reserved_src, *this->my_owner ); + reserved_src = NULL; + } else { + // Retain ownership of the edge + this->add( *reserved_src ); + } + } while ( msg == false ); + + return msg; + } + + bool + try_release( ) { + reserved_src->try_release( ); + reserved_src = NULL; + return true; + } + + bool + try_consume( ) { + reserved_src->try_consume( ); + reserved_src = NULL; + return true; + } + + void reset( ) { + reserved_src = NULL; + predecessor_cache<T,M>::reset( ); + } + + void clear() { + reserved_src = NULL; + predecessor_cache<T,M>::clear(); + } + +private: + predecessor_type *reserved_src; +}; + + +//! An abstract cache of successors +template<typename T, typename M=spin_rw_mutex > +class successor_cache : no_copy { +protected: + + typedef M mutex_type; + mutex_type my_mutex; + + typedef receiver<T> successor_type; + typedef receiver<T>* pointer_type; + typedef sender<T> owner_type; + // TODO revamp: introduce heapified collection of successors for strict priorities + typedef std::list< pointer_type > successors_type; + successors_type my_successors; + + owner_type* my_owner; + +public: + successor_cache( owner_type* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + virtual ~successor_cache() {} + + void register_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + if( r.priority() != no_priority ) + my_successors.push_front( &r ); + else + my_successors.push_back( &r ); + } + + void remove_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + for ( typename successors_type::iterator i = my_successors.begin(); + i != my_successors.end(); ++i ) { + if ( *i == & r ) { + my_successors.erase(i); + break; + } + } + } + + bool empty() { + typename mutex_type::scoped_lock l(my_mutex, false); + return my_successors.empty(); + } + + void clear() { + my_successors.clear(); + } + + virtual graph_task* try_put_task( const T& t ) = 0; +}; // successor_cache<T> + +//! An abstract cache of successors, specialized to continue_msg +template<typename M> +class successor_cache< continue_msg, M > : no_copy { +protected: + + typedef M mutex_type; + mutex_type my_mutex; + + typedef receiver<continue_msg> successor_type; + typedef receiver<continue_msg>* pointer_type; + typedef sender<continue_msg> owner_type; + typedef std::list< pointer_type > successors_type; + successors_type my_successors; + owner_type* my_owner; + +public: + successor_cache( sender<continue_msg>* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + virtual ~successor_cache() {} + + void register_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + if( r.priority() != no_priority ) + my_successors.push_front( &r ); + else + my_successors.push_back( &r ); + __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); + if ( r.is_continue_receiver() ) { + r.register_predecessor( *my_owner ); + } + } + + void remove_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { + if ( *i == &r ) { + __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); + // TODO: check if we need to test for continue_receiver before removing from r. + r.remove_predecessor( *my_owner ); + my_successors.erase(i); + break; + } + } + } + + bool empty() { + typename mutex_type::scoped_lock l(my_mutex, false); + return my_successors.empty(); + } + + void clear() { + my_successors.clear(); + } + + virtual graph_task* try_put_task( const continue_msg& t ) = 0; +}; // successor_cache< continue_msg > + +//! A cache of successors that are broadcast to +template<typename T, typename M=spin_rw_mutex> +class broadcast_cache : public successor_cache<T, M> { + typedef successor_cache<T, M> base_type; + typedef M mutex_type; + typedef typename successor_cache<T,M>::successors_type successors_type; + +public: + + broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + // as above, but call try_put_task instead, and return the last task we received (if any) + graph_task* try_put_task( const T &t ) override { + graph_task * last_task = nullptr; + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task *new_task = (*i)->try_put_task(t); + // workaround for icc bug + graph& graph_ref = (*i)->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary + if(new_task) { + ++i; + } + else { // failed + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } else { + ++i; + } + } + } + return last_task; + } + + // call try_put_task and return list of received tasks + bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { + bool is_at_least_one_put_successful = false; + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task * new_task = (*i)->try_put_task(t); + if(new_task) { + ++i; + if(new_task != SUCCESSFULLY_ENQUEUED) { + tasks.push_back(*new_task); + } + is_at_least_one_put_successful = true; + } + else { // failed + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } else { + ++i; + } + } + } + return is_at_least_one_put_successful; + } +}; + +//! A cache of successors that are put in a round-robin fashion +template<typename T, typename M=spin_rw_mutex > +class round_robin_cache : public successor_cache<T, M> { + typedef successor_cache<T, M> base_type; + typedef size_t size_type; + typedef M mutex_type; + typedef typename successor_cache<T,M>::successors_type successors_type; + +public: + + round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + size_type size() { + typename mutex_type::scoped_lock l(this->my_mutex, false); + return this->my_successors.size(); + } + + graph_task* try_put_task( const T &t ) override { + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task* new_task = (*i)->try_put_task(t); + if ( new_task ) { + return new_task; + } else { + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } + else { + ++i; + } + } + } + return NULL; + } +}; + +#endif // __TBB__flow_graph_cache_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h index a3d17cfb1c..1c4e6c8917 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h @@ -1,488 +1,488 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_flow_graph_impl_H -#define __TBB_flow_graph_impl_H - -// #include "../config.h" -#include "_task.h" -#include "tbb/task_group.h" -#include "../task_arena.h" -#include "../flow_graph_abstractions.h" - -#include "../concurrent_priority_queue.h" - -#include <list> - -namespace tbb { -namespace detail { - -namespace d1 { - -class graph_task; -static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; -typedef unsigned int node_priority_t; -static const node_priority_t no_priority = node_priority_t(0); - -class graph; -class graph_node; - -template <typename GraphContainerType, typename GraphNodeType> -class graph_iterator { - friend class graph; - friend class graph_node; -public: - typedef size_t size_type; - typedef GraphNodeType value_type; - typedef GraphNodeType* pointer; - typedef GraphNodeType& reference; - typedef const GraphNodeType& const_reference; - typedef std::forward_iterator_tag iterator_category; - - //! Copy constructor - graph_iterator(const graph_iterator& other) : - my_graph(other.my_graph), current_node(other.current_node) - {} - - //! Assignment - graph_iterator& operator=(const graph_iterator& other) { - if (this != &other) { - my_graph = other.my_graph; - current_node = other.current_node; - } - return *this; - } - - //! Dereference - reference operator*() const; - - //! Dereference - pointer operator->() const; - - //! Equality - bool operator==(const graph_iterator& other) const { - return ((my_graph == other.my_graph) && (current_node == other.current_node)); - } - -#if !__TBB_CPP20_COMPARISONS_PRESENT - //! Inequality - bool operator!=(const graph_iterator& other) const { return !(operator==(other)); } -#endif - - //! Pre-increment - graph_iterator& operator++() { - internal_forward(); - return *this; - } - - //! Post-increment - graph_iterator operator++(int) { - graph_iterator result = *this; - operator++(); - return result; - } - -private: - // the graph over which we are iterating - GraphContainerType *my_graph; - // pointer into my_graph's my_nodes list - pointer current_node; - - //! Private initializing constructor for begin() and end() iterators - graph_iterator(GraphContainerType *g, bool begin); - void internal_forward(); -}; // class graph_iterator - -// flags to modify the behavior of the graph reset(). Can be combined. -enum reset_flags { - rf_reset_protocol = 0, - rf_reset_bodies = 1 << 0, // delete the current node body, reset to a copy of the initial node body. - rf_clear_edges = 1 << 1 // delete edges -}; - -void activate_graph(graph& g); -void deactivate_graph(graph& g); -bool is_graph_active(graph& g); -graph_task* prioritize_task(graph& g, graph_task& arena_task); -void spawn_in_graph_arena(graph& g, graph_task& arena_task); -void enqueue_in_graph_arena(graph &g, graph_task& arena_task); - -class graph; - -//! Base class for tasks generated by graph nodes. -class graph_task : public task { -public: - graph_task(graph& g, small_object_allocator& allocator - , node_priority_t node_priority = no_priority - ) - : my_graph(g) - , priority(node_priority) - , my_allocator(allocator) - {} - graph& my_graph; // graph instance the task belongs to - // TODO revamp: rename to my_priority - node_priority_t priority; - void destruct_and_deallocate(const execution_data& ed); - task* cancel(execution_data& ed) override; -protected: - void finalize(const execution_data& ed); -private: - // To organize task_list - graph_task* my_next{ nullptr }; - small_object_allocator my_allocator; - // TODO revamp: elaborate internal interfaces to avoid friends declarations - friend class graph_task_list; - friend graph_task* prioritize_task(graph& g, graph_task& gt); -}; - -struct graph_task_comparator { - bool operator()(const graph_task* left, const graph_task* right) { - return left->priority < right->priority; - } -}; - -typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t; - -class priority_task_selector : public task { -public: - priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) - : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} - task* execute(execution_data& ed) override { - next_task(); - __TBB_ASSERT(my_task, nullptr); - task* t_next = my_task->execute(ed); - my_allocator.delete_object(this, ed); - return t_next; - } - task* cancel(execution_data& ed) override { - if (!my_task) { - next_task(); - } - __TBB_ASSERT(my_task, nullptr); - task* t_next = my_task->cancel(ed); - my_allocator.delete_object(this, ed); - return t_next; - } -private: - void next_task() { - // TODO revamp: hold functors in priority queue instead of real tasks - bool result = my_priority_queue.try_pop(my_task); - __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" - " in graph's priority queue mismatched"); - __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, - "Incorrect task submitted to graph priority queue"); - __TBB_ASSERT(my_task->priority != no_priority, - "Tasks from graph's priority queue must have priority"); - } - - graph_task_priority_queue_t& my_priority_queue; - small_object_allocator my_allocator; - graph_task* my_task; -}; - -template <typename Receiver, typename Body> class run_and_put_task; -template <typename Body> class run_task; - -//******************************************************************************** -// graph tasks helpers -//******************************************************************************** - -//! The list of graph tasks -class graph_task_list : no_copy { -private: - graph_task* my_first; - graph_task** my_next_ptr; -public: - //! Construct empty list - graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {} - - //! True if list is empty; false otherwise. - bool empty() const { return !my_first; } - - //! Push task onto back of list. - void push_back(graph_task& task) { - task.my_next = nullptr; - *my_next_ptr = &task; - my_next_ptr = &task.my_next; - } - - //! Pop the front task from the list. - graph_task& pop_front() { - __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list"); - graph_task* result = my_first; - my_first = result->my_next; - if (!my_first) { - my_next_ptr = &my_first; - } - return *result; - } -}; - -//! The graph class -/** This class serves as a handle to the graph */ -class graph : no_copy, public graph_proxy { - friend class graph_node; - - void prepare_task_arena(bool reinit = false) { - if (reinit) { - __TBB_ASSERT(my_task_arena, "task arena is NULL"); - my_task_arena->terminate(); - my_task_arena->initialize(task_arena::attach()); - } - else { - __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL"); - my_task_arena = new task_arena(task_arena::attach()); - } - if (!my_task_arena->is_active()) // failed to attach - my_task_arena->initialize(); // create a new, default-initialized arena - __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active"); - } - -public: - //! Constructs a graph with isolated task_group_context - graph(); - - //! Constructs a graph with use_this_context as context - explicit graph(task_group_context& use_this_context); - - //! Destroys the graph. - /** Calls wait_for_all, then destroys the root task and context. */ - ~graph(); - - //! Used to register that an external entity may still interact with the graph. - /** The graph will not return from wait_for_all until a matching number of release_wait calls is - made. */ - void reserve_wait() override; - - //! Deregisters an external entity that may have interacted with the graph. - /** The graph will not return from wait_for_all until all the number of reserve_wait calls - matches the number of release_wait calls. */ - void release_wait() override; - - //! Wait until graph is idle and the number of release_wait calls equals to the number of - //! reserve_wait calls. - /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */ - void wait_for_all() { - cancelled = false; - caught_exception = false; - try_call([this] { - my_task_arena->execute([this] { - wait(my_wait_context, *my_context); - }); - cancelled = my_context->is_group_execution_cancelled(); - }).on_exception([this] { - my_context->reset(); - caught_exception = true; - cancelled = true; - }); - // TODO: the "if" condition below is just a work-around to support the concurrent wait - // mode. The cancellation and exception mechanisms are still broken in this mode. - // Consider using task group not to re-implement the same functionality. - if (!(my_context->traits() & task_group_context::concurrent_wait)) { - my_context->reset(); // consistent with behavior in catch() - } - } - -#if TODO_REVAMP -#error Decide on ref_count() presence. - Its only use is in the template<typename T, typename BufferType> void test_resets() -#endif - -#if __TBB_EXTRA_DEBUG - unsigned ref_count() const { return my_wait_context.reference_count(); } -#endif - - - // TODO revamp: consider adding getter for task_group_context. - - // ITERATORS - template<typename C, typename N> - friend class graph_iterator; - - // Graph iterator typedefs - typedef graph_iterator<graph, graph_node> iterator; - typedef graph_iterator<const graph, const graph_node> const_iterator; - - // Graph iterator constructors - //! start iterator - iterator begin(); - //! end iterator - iterator end(); - //! start const iterator - const_iterator begin() const; - //! end const iterator - const_iterator end() const; - //! start const iterator - const_iterator cbegin() const; - //! end const iterator - const_iterator cend() const; - - // thread-unsafe state reset. - void reset(reset_flags f = rf_reset_protocol); - - //! cancels execution of the associated task_group_context - void cancel(); - - //! return status of graph execution - bool is_cancelled() { return cancelled; } - bool exception_thrown() { return caught_exception; } - -private: - wait_context my_wait_context; - task_group_context *my_context; - bool own_context; - bool cancelled; - bool caught_exception; - bool my_is_active; - - graph_node *my_nodes, *my_nodes_last; - - tbb::spin_mutex nodelist_mutex; - void register_node(graph_node *n); - void remove_node(graph_node *n); - - task_arena* my_task_arena; - - graph_task_priority_queue_t my_priority_queue; - - friend void activate_graph(graph& g); - friend void deactivate_graph(graph& g); - friend bool is_graph_active(graph& g); - friend graph_task* prioritize_task(graph& g, graph_task& arena_task); - friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); - friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); - - friend class task_arena_base; - -}; // class graph - -inline void graph_task::destruct_and_deallocate(const execution_data& ed) { - auto allocator = my_allocator; - // TODO: investigate if direct call of derived destructor gives any benefits. - this->~graph_task(); - allocator.deallocate(this, ed); -} - -inline void graph_task::finalize(const execution_data& ed) { - graph& g = my_graph; - destruct_and_deallocate(ed); - g.release_wait(); -} - -inline task* graph_task::cancel(execution_data& ed) { - finalize(ed); - return nullptr; -} - -//******************************************************************************** -// end of graph tasks helpers -//******************************************************************************** - - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -class get_graph_helper; -#endif - -//! The base of all graph nodes. -class graph_node : no_copy { - friend class graph; - template<typename C, typename N> - friend class graph_iterator; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - friend class get_graph_helper; -#endif - -protected: - graph& my_graph; - graph& graph_reference() const { - // TODO revamp: propagate graph_reference() method to all the reference places. - return my_graph; - } - graph_node* next = nullptr; - graph_node* prev = nullptr; -public: - explicit graph_node(graph& g); - - virtual ~graph_node(); - -protected: - // performs the reset on an individual node. - virtual void reset_node(reset_flags f = rf_reset_protocol) = 0; -}; // class graph_node - -inline void activate_graph(graph& g) { - g.my_is_active = true; -} - -inline void deactivate_graph(graph& g) { - g.my_is_active = false; -} - -inline bool is_graph_active(graph& g) { - return g.my_is_active; -} - -inline graph_task* prioritize_task(graph& g, graph_task& gt) { - if( no_priority == gt.priority ) - return > - - //! Non-preemptive priority pattern. The original task is submitted as a work item to the - //! priority queue, and a new critical task is created to take and execute a work item with - //! the highest known priority. The reference counting responsibility is transferred (via - //! allocate_continuation) to the new task. - task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); - __TBB_ASSERT( critical_task, "bad_alloc?" ); - g.my_priority_queue.push(>); - using tbb::detail::d1::submit; - submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true ); - return nullptr; -} - -//! Spawns a task inside graph arena -inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { - if (is_graph_active(g)) { - task* gt = prioritize_task(g, arena_task); - if( !gt ) - return; - - __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL); - submit( *gt, *g.my_task_arena, *g.my_context -#if __TBB_PREVIEW_CRITICAL_TASKS - , /*as_critical=*/false -#endif - ); - } -} - -// TODO revamp: unify *_in_graph_arena functions - -//! Enqueues a task inside graph arena -inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { - if (is_graph_active(g)) { - __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); - - // TODO revamp: decide on the approach that does not postpone critical task - if( task* gt = prioritize_task(g, arena_task) ) - submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); - } -} - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_flow_graph_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_impl_H +#define __TBB_flow_graph_impl_H + +// #include "../config.h" +#include "_task.h" +#include "tbb/task_group.h" +#include "../task_arena.h" +#include "../flow_graph_abstractions.h" + +#include "../concurrent_priority_queue.h" + +#include <list> + +namespace tbb { +namespace detail { + +namespace d1 { + +class graph_task; +static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; +typedef unsigned int node_priority_t; +static const node_priority_t no_priority = node_priority_t(0); + +class graph; +class graph_node; + +template <typename GraphContainerType, typename GraphNodeType> +class graph_iterator { + friend class graph; + friend class graph_node; +public: + typedef size_t size_type; + typedef GraphNodeType value_type; + typedef GraphNodeType* pointer; + typedef GraphNodeType& reference; + typedef const GraphNodeType& const_reference; + typedef std::forward_iterator_tag iterator_category; + + //! Copy constructor + graph_iterator(const graph_iterator& other) : + my_graph(other.my_graph), current_node(other.current_node) + {} + + //! Assignment + graph_iterator& operator=(const graph_iterator& other) { + if (this != &other) { + my_graph = other.my_graph; + current_node = other.current_node; + } + return *this; + } + + //! Dereference + reference operator*() const; + + //! Dereference + pointer operator->() const; + + //! Equality + bool operator==(const graph_iterator& other) const { + return ((my_graph == other.my_graph) && (current_node == other.current_node)); + } + +#if !__TBB_CPP20_COMPARISONS_PRESENT + //! Inequality + bool operator!=(const graph_iterator& other) const { return !(operator==(other)); } +#endif + + //! Pre-increment + graph_iterator& operator++() { + internal_forward(); + return *this; + } + + //! Post-increment + graph_iterator operator++(int) { + graph_iterator result = *this; + operator++(); + return result; + } + +private: + // the graph over which we are iterating + GraphContainerType *my_graph; + // pointer into my_graph's my_nodes list + pointer current_node; + + //! Private initializing constructor for begin() and end() iterators + graph_iterator(GraphContainerType *g, bool begin); + void internal_forward(); +}; // class graph_iterator + +// flags to modify the behavior of the graph reset(). Can be combined. +enum reset_flags { + rf_reset_protocol = 0, + rf_reset_bodies = 1 << 0, // delete the current node body, reset to a copy of the initial node body. + rf_clear_edges = 1 << 1 // delete edges +}; + +void activate_graph(graph& g); +void deactivate_graph(graph& g); +bool is_graph_active(graph& g); +graph_task* prioritize_task(graph& g, graph_task& arena_task); +void spawn_in_graph_arena(graph& g, graph_task& arena_task); +void enqueue_in_graph_arena(graph &g, graph_task& arena_task); + +class graph; + +//! Base class for tasks generated by graph nodes. +class graph_task : public task { +public: + graph_task(graph& g, small_object_allocator& allocator + , node_priority_t node_priority = no_priority + ) + : my_graph(g) + , priority(node_priority) + , my_allocator(allocator) + {} + graph& my_graph; // graph instance the task belongs to + // TODO revamp: rename to my_priority + node_priority_t priority; + void destruct_and_deallocate(const execution_data& ed); + task* cancel(execution_data& ed) override; +protected: + void finalize(const execution_data& ed); +private: + // To organize task_list + graph_task* my_next{ nullptr }; + small_object_allocator my_allocator; + // TODO revamp: elaborate internal interfaces to avoid friends declarations + friend class graph_task_list; + friend graph_task* prioritize_task(graph& g, graph_task& gt); +}; + +struct graph_task_comparator { + bool operator()(const graph_task* left, const graph_task* right) { + return left->priority < right->priority; + } +}; + +typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t; + +class priority_task_selector : public task { +public: + priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) + : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} + task* execute(execution_data& ed) override { + next_task(); + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->execute(ed); + my_allocator.delete_object(this, ed); + return t_next; + } + task* cancel(execution_data& ed) override { + if (!my_task) { + next_task(); + } + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->cancel(ed); + my_allocator.delete_object(this, ed); + return t_next; + } +private: + void next_task() { + // TODO revamp: hold functors in priority queue instead of real tasks + bool result = my_priority_queue.try_pop(my_task); + __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" + " in graph's priority queue mismatched"); + __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, + "Incorrect task submitted to graph priority queue"); + __TBB_ASSERT(my_task->priority != no_priority, + "Tasks from graph's priority queue must have priority"); + } + + graph_task_priority_queue_t& my_priority_queue; + small_object_allocator my_allocator; + graph_task* my_task; +}; + +template <typename Receiver, typename Body> class run_and_put_task; +template <typename Body> class run_task; + +//******************************************************************************** +// graph tasks helpers +//******************************************************************************** + +//! The list of graph tasks +class graph_task_list : no_copy { +private: + graph_task* my_first; + graph_task** my_next_ptr; +public: + //! Construct empty list + graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {} + + //! True if list is empty; false otherwise. + bool empty() const { return !my_first; } + + //! Push task onto back of list. + void push_back(graph_task& task) { + task.my_next = nullptr; + *my_next_ptr = &task; + my_next_ptr = &task.my_next; + } + + //! Pop the front task from the list. + graph_task& pop_front() { + __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list"); + graph_task* result = my_first; + my_first = result->my_next; + if (!my_first) { + my_next_ptr = &my_first; + } + return *result; + } +}; + +//! The graph class +/** This class serves as a handle to the graph */ +class graph : no_copy, public graph_proxy { + friend class graph_node; + + void prepare_task_arena(bool reinit = false) { + if (reinit) { + __TBB_ASSERT(my_task_arena, "task arena is NULL"); + my_task_arena->terminate(); + my_task_arena->initialize(task_arena::attach()); + } + else { + __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL"); + my_task_arena = new task_arena(task_arena::attach()); + } + if (!my_task_arena->is_active()) // failed to attach + my_task_arena->initialize(); // create a new, default-initialized arena + __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active"); + } + +public: + //! Constructs a graph with isolated task_group_context + graph(); + + //! Constructs a graph with use_this_context as context + explicit graph(task_group_context& use_this_context); + + //! Destroys the graph. + /** Calls wait_for_all, then destroys the root task and context. */ + ~graph(); + + //! Used to register that an external entity may still interact with the graph. + /** The graph will not return from wait_for_all until a matching number of release_wait calls is + made. */ + void reserve_wait() override; + + //! Deregisters an external entity that may have interacted with the graph. + /** The graph will not return from wait_for_all until all the number of reserve_wait calls + matches the number of release_wait calls. */ + void release_wait() override; + + //! Wait until graph is idle and the number of release_wait calls equals to the number of + //! reserve_wait calls. + /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */ + void wait_for_all() { + cancelled = false; + caught_exception = false; + try_call([this] { + my_task_arena->execute([this] { + wait(my_wait_context, *my_context); + }); + cancelled = my_context->is_group_execution_cancelled(); + }).on_exception([this] { + my_context->reset(); + caught_exception = true; + cancelled = true; + }); + // TODO: the "if" condition below is just a work-around to support the concurrent wait + // mode. The cancellation and exception mechanisms are still broken in this mode. + // Consider using task group not to re-implement the same functionality. + if (!(my_context->traits() & task_group_context::concurrent_wait)) { + my_context->reset(); // consistent with behavior in catch() + } + } + +#if TODO_REVAMP +#error Decide on ref_count() presence. + Its only use is in the template<typename T, typename BufferType> void test_resets() +#endif + +#if __TBB_EXTRA_DEBUG + unsigned ref_count() const { return my_wait_context.reference_count(); } +#endif + + + // TODO revamp: consider adding getter for task_group_context. + + // ITERATORS + template<typename C, typename N> + friend class graph_iterator; + + // Graph iterator typedefs + typedef graph_iterator<graph, graph_node> iterator; + typedef graph_iterator<const graph, const graph_node> const_iterator; + + // Graph iterator constructors + //! start iterator + iterator begin(); + //! end iterator + iterator end(); + //! start const iterator + const_iterator begin() const; + //! end const iterator + const_iterator end() const; + //! start const iterator + const_iterator cbegin() const; + //! end const iterator + const_iterator cend() const; + + // thread-unsafe state reset. + void reset(reset_flags f = rf_reset_protocol); + + //! cancels execution of the associated task_group_context + void cancel(); + + //! return status of graph execution + bool is_cancelled() { return cancelled; } + bool exception_thrown() { return caught_exception; } + +private: + wait_context my_wait_context; + task_group_context *my_context; + bool own_context; + bool cancelled; + bool caught_exception; + bool my_is_active; + + graph_node *my_nodes, *my_nodes_last; + + tbb::spin_mutex nodelist_mutex; + void register_node(graph_node *n); + void remove_node(graph_node *n); + + task_arena* my_task_arena; + + graph_task_priority_queue_t my_priority_queue; + + friend void activate_graph(graph& g); + friend void deactivate_graph(graph& g); + friend bool is_graph_active(graph& g); + friend graph_task* prioritize_task(graph& g, graph_task& arena_task); + friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); + friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); + + friend class task_arena_base; + +}; // class graph + +inline void graph_task::destruct_and_deallocate(const execution_data& ed) { + auto allocator = my_allocator; + // TODO: investigate if direct call of derived destructor gives any benefits. + this->~graph_task(); + allocator.deallocate(this, ed); +} + +inline void graph_task::finalize(const execution_data& ed) { + graph& g = my_graph; + destruct_and_deallocate(ed); + g.release_wait(); +} + +inline task* graph_task::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//******************************************************************************** +// end of graph tasks helpers +//******************************************************************************** + + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +class get_graph_helper; +#endif + +//! The base of all graph nodes. +class graph_node : no_copy { + friend class graph; + template<typename C, typename N> + friend class graph_iterator; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + +protected: + graph& my_graph; + graph& graph_reference() const { + // TODO revamp: propagate graph_reference() method to all the reference places. + return my_graph; + } + graph_node* next = nullptr; + graph_node* prev = nullptr; +public: + explicit graph_node(graph& g); + + virtual ~graph_node(); + +protected: + // performs the reset on an individual node. + virtual void reset_node(reset_flags f = rf_reset_protocol) = 0; +}; // class graph_node + +inline void activate_graph(graph& g) { + g.my_is_active = true; +} + +inline void deactivate_graph(graph& g) { + g.my_is_active = false; +} + +inline bool is_graph_active(graph& g) { + return g.my_is_active; +} + +inline graph_task* prioritize_task(graph& g, graph_task& gt) { + if( no_priority == gt.priority ) + return > + + //! Non-preemptive priority pattern. The original task is submitted as a work item to the + //! priority queue, and a new critical task is created to take and execute a work item with + //! the highest known priority. The reference counting responsibility is transferred (via + //! allocate_continuation) to the new task. + task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); + __TBB_ASSERT( critical_task, "bad_alloc?" ); + g.my_priority_queue.push(>); + using tbb::detail::d1::submit; + submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true ); + return nullptr; +} + +//! Spawns a task inside graph arena +inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { + if (is_graph_active(g)) { + task* gt = prioritize_task(g, arena_task); + if( !gt ) + return; + + __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL); + submit( *gt, *g.my_task_arena, *g.my_context +#if __TBB_PREVIEW_CRITICAL_TASKS + , /*as_critical=*/false +#endif + ); + } +} + +// TODO revamp: unify *_in_graph_arena functions + +//! Enqueues a task inside graph arena +inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { + if (is_graph_active(g)) { + __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); + + // TODO revamp: decide on the approach that does not postpone critical task + if( task* gt = prioritize_task(g, arena_task) ) + submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); + } +} + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_flow_graph_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h index f4f55a6c7a..d73fe86bfc 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h @@ -1,351 +1,351 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_indexer_impl_H -#define __TBB__flow_graph_indexer_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included in namespace tbb::detail::d1 - -#include "_flow_graph_types_impl.h" - - // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of - // the form tagged_msg<tag, result> - // where the value of tag will indicate which result was put to the - // successor. - - template<typename IndexerNodeBaseType, typename T, size_t K> - graph_task* do_try_put(const T &v, void *p) { - typename IndexerNodeBaseType::output_type o(K, v); - return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o); - } - - template<typename TupleTypes,int N> - struct indexer_helper { - template<typename IndexerNodeBaseType, typename PortTuple> - static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { - typedef typename std::tuple_element<N-1, TupleTypes>::type T; - graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>; - std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g); - indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g); - } - }; - - template<typename TupleTypes> - struct indexer_helper<TupleTypes,1> { - template<typename IndexerNodeBaseType, typename PortTuple> - static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { - typedef typename std::tuple_element<0, TupleTypes>::type T; - graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>; - std::get<0>(my_input).set_up(p, indexer_node_put_task, g); - } - }; - - template<typename T> - class indexer_input_port : public receiver<T> { - private: - void* my_indexer_ptr; - typedef graph_task* (* forward_function_ptr)(T const &, void* ); - forward_function_ptr my_try_put_task; - graph* my_graph; - public: - void set_up(void* p, forward_function_ptr f, graph& g) { - my_indexer_ptr = p; - my_try_put_task = f; - my_graph = &g; - } - - protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task(const T &v) override { - return my_try_put_task(v, my_indexer_ptr); - } - - graph& graph_reference() const override { - return *my_graph; - } - }; - - template<typename InputTuple, typename OutputType, typename StructTypes> - class indexer_node_FE { - public: - static const int N = std::tuple_size<InputTuple>::value; - typedef OutputType output_type; - typedef InputTuple input_type; - - // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member. - indexer_node_FE() : my_inputs() {} - - input_type &input_ports() { return my_inputs; } - protected: - input_type my_inputs; - }; - - //! indexer_node_base - template<typename InputTuple, typename OutputType, typename StructTypes> - class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>, - public sender<OutputType> { - protected: - using graph_node::my_graph; - public: - static const size_t N = std::tuple_size<InputTuple>::value; - typedef OutputType output_type; - typedef StructTypes tuple_types; - typedef typename sender<output_type>::successor_type successor_type; - typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type; - - private: - // ----------- Aggregator ------------ - enum op_type { reg_succ, rem_succ, try__put_task - }; - typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type; - - class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> { - public: - char type; - union { - output_type const *my_arg; - successor_type *my_succ; - graph_task* bypass_t; - }; - indexer_node_base_operation(const output_type* e, op_type t) : - type(char(t)), my_arg(e) {} - indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), - my_succ(const_cast<successor_type *>(&s)) {} - }; - - typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type; - friend class aggregating_functor<class_type, indexer_node_base_operation>; - aggregator<handler_type, indexer_node_base_operation> my_aggregator; - - void handle_operations(indexer_node_base_operation* op_list) { - indexer_node_base_operation *current; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - - case reg_succ: - my_successors.register_successor(*(current->my_succ)); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - - case rem_succ: - my_successors.remove_successor(*(current->my_succ)); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case try__put_task: { - current->bypass_t = my_successors.try_put_task(*(current->my_arg)); - current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value - } - break; - } - } - } - // ---------- end aggregator ----------- - public: - indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { - indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g); - my_aggregator.initialize_handler(handler_type(this)); - } - - indexer_node_base(const indexer_node_base& other) - : graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this) - { - indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); - my_aggregator.initialize_handler(handler_type(this)); - } - - bool register_successor(successor_type &r) override { - indexer_node_base_operation op_data(r, reg_succ); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - bool remove_successor( successor_type &r) override { - indexer_node_base_operation op_data(r, rem_succ); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - graph_task* try_put_task(output_type const *v) { // not a virtual method in this class - indexer_node_base_operation op_data(v, try__put_task); - my_aggregator.execute(&op_data); - return op_data.bypass_t; - } - - protected: - void reset_node(reset_flags f) override { - if(f & rf_clear_edges) { - my_successors.clear(); - } - } - - private: - broadcast_cache<output_type, null_rw_mutex> my_successors; - }; //indexer_node_base - - - template<int N, typename InputTuple> struct input_types; - - template<typename InputTuple> - struct input_types<1, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef tagged_msg<size_t, first_type > type; - }; - - template<typename InputTuple> - struct input_types<2, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef tagged_msg<size_t, first_type, second_type> type; - }; - - template<typename InputTuple> - struct input_types<3, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef tagged_msg<size_t, first_type, second_type, third_type> type; - }; - - template<typename InputTuple> - struct input_types<4, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type> type; - }; - - template<typename InputTuple> - struct input_types<5, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type> type; - }; - - template<typename InputTuple> - struct input_types<6, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef typename std::tuple_element<5, InputTuple>::type sixth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type, sixth_type> type; - }; - - template<typename InputTuple> - struct input_types<7, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef typename std::tuple_element<5, InputTuple>::type sixth_type; - typedef typename std::tuple_element<6, InputTuple>::type seventh_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type, sixth_type, - seventh_type> type; - }; - - - template<typename InputTuple> - struct input_types<8, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef typename std::tuple_element<5, InputTuple>::type sixth_type; - typedef typename std::tuple_element<6, InputTuple>::type seventh_type; - typedef typename std::tuple_element<7, InputTuple>::type eighth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type, sixth_type, - seventh_type, eighth_type> type; - }; - - - template<typename InputTuple> - struct input_types<9, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef typename std::tuple_element<5, InputTuple>::type sixth_type; - typedef typename std::tuple_element<6, InputTuple>::type seventh_type; - typedef typename std::tuple_element<7, InputTuple>::type eighth_type; - typedef typename std::tuple_element<8, InputTuple>::type nineth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type, sixth_type, - seventh_type, eighth_type, nineth_type> type; - }; - - template<typename InputTuple> - struct input_types<10, InputTuple> { - typedef typename std::tuple_element<0, InputTuple>::type first_type; - typedef typename std::tuple_element<1, InputTuple>::type second_type; - typedef typename std::tuple_element<2, InputTuple>::type third_type; - typedef typename std::tuple_element<3, InputTuple>::type fourth_type; - typedef typename std::tuple_element<4, InputTuple>::type fifth_type; - typedef typename std::tuple_element<5, InputTuple>::type sixth_type; - typedef typename std::tuple_element<6, InputTuple>::type seventh_type; - typedef typename std::tuple_element<7, InputTuple>::type eighth_type; - typedef typename std::tuple_element<8, InputTuple>::type nineth_type; - typedef typename std::tuple_element<9, InputTuple>::type tenth_type; - typedef tagged_msg<size_t, first_type, second_type, third_type, - fourth_type, fifth_type, sixth_type, - seventh_type, eighth_type, nineth_type, - tenth_type> type; - }; - - // type generators - template<typename OutputTuple> - struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> { - static const int N = std::tuple_size<OutputTuple>::value; - typedef typename input_types<N, OutputTuple>::type output_type; - typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type; - typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type; - typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type; - }; - - template<class OutputTuple> - class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type { - public: - typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type; - typedef OutputTuple tuple_types; - typedef typename indexer_types<OutputTuple>::output_type output_type; - private: - typedef typename indexer_types<OutputTuple>::indexer_base_type base_type; - public: - unfolded_indexer_node(graph& g) : base_type(g) {} - unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {} - }; - -#endif /* __TBB__flow_graph_indexer_impl_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_indexer_impl_H +#define __TBB__flow_graph_indexer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 + +#include "_flow_graph_types_impl.h" + + // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of + // the form tagged_msg<tag, result> + // where the value of tag will indicate which result was put to the + // successor. + + template<typename IndexerNodeBaseType, typename T, size_t K> + graph_task* do_try_put(const T &v, void *p) { + typename IndexerNodeBaseType::output_type o(K, v); + return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o); + } + + template<typename TupleTypes,int N> + struct indexer_helper { + template<typename IndexerNodeBaseType, typename PortTuple> + static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { + typedef typename std::tuple_element<N-1, TupleTypes>::type T; + graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>; + std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g); + indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g); + } + }; + + template<typename TupleTypes> + struct indexer_helper<TupleTypes,1> { + template<typename IndexerNodeBaseType, typename PortTuple> + static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { + typedef typename std::tuple_element<0, TupleTypes>::type T; + graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>; + std::get<0>(my_input).set_up(p, indexer_node_put_task, g); + } + }; + + template<typename T> + class indexer_input_port : public receiver<T> { + private: + void* my_indexer_ptr; + typedef graph_task* (* forward_function_ptr)(T const &, void* ); + forward_function_ptr my_try_put_task; + graph* my_graph; + public: + void set_up(void* p, forward_function_ptr f, graph& g) { + my_indexer_ptr = p; + my_try_put_task = f; + my_graph = &g; + } + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const T &v) override { + return my_try_put_task(v, my_indexer_ptr); + } + + graph& graph_reference() const override { + return *my_graph; + } + }; + + template<typename InputTuple, typename OutputType, typename StructTypes> + class indexer_node_FE { + public: + static const int N = std::tuple_size<InputTuple>::value; + typedef OutputType output_type; + typedef InputTuple input_type; + + // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member. + indexer_node_FE() : my_inputs() {} + + input_type &input_ports() { return my_inputs; } + protected: + input_type my_inputs; + }; + + //! indexer_node_base + template<typename InputTuple, typename OutputType, typename StructTypes> + class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>, + public sender<OutputType> { + protected: + using graph_node::my_graph; + public: + static const size_t N = std::tuple_size<InputTuple>::value; + typedef OutputType output_type; + typedef StructTypes tuple_types; + typedef typename sender<output_type>::successor_type successor_type; + typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_succ, rem_succ, try__put_task + }; + typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type; + + class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> { + public: + char type; + union { + output_type const *my_arg; + successor_type *my_succ; + graph_task* bypass_t; + }; + indexer_node_base_operation(const output_type* e, op_type t) : + type(char(t)), my_arg(e) {} + indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), + my_succ(const_cast<successor_type *>(&s)) {} + }; + + typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type; + friend class aggregating_functor<class_type, indexer_node_base_operation>; + aggregator<handler_type, indexer_node_base_operation> my_aggregator; + + void handle_operations(indexer_node_base_operation* op_list) { + indexer_node_base_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + + case reg_succ: + my_successors.register_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + + case rem_succ: + my_successors.remove_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case try__put_task: { + current->bypass_t = my_successors.try_put_task(*(current->my_arg)); + current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value + } + break; + } + } + } + // ---------- end aggregator ----------- + public: + indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { + indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g); + my_aggregator.initialize_handler(handler_type(this)); + } + + indexer_node_base(const indexer_node_base& other) + : graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this) + { + indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); + my_aggregator.initialize_handler(handler_type(this)); + } + + bool register_successor(successor_type &r) override { + indexer_node_base_operation op_data(r, reg_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool remove_successor( successor_type &r) override { + indexer_node_base_operation op_data(r, rem_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + graph_task* try_put_task(output_type const *v) { // not a virtual method in this class + indexer_node_base_operation op_data(v, try__put_task); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + protected: + void reset_node(reset_flags f) override { + if(f & rf_clear_edges) { + my_successors.clear(); + } + } + + private: + broadcast_cache<output_type, null_rw_mutex> my_successors; + }; //indexer_node_base + + + template<int N, typename InputTuple> struct input_types; + + template<typename InputTuple> + struct input_types<1, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef tagged_msg<size_t, first_type > type; + }; + + template<typename InputTuple> + struct input_types<2, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef tagged_msg<size_t, first_type, second_type> type; + }; + + template<typename InputTuple> + struct input_types<3, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef tagged_msg<size_t, first_type, second_type, third_type> type; + }; + + template<typename InputTuple> + struct input_types<4, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type> type; + }; + + template<typename InputTuple> + struct input_types<5, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type> type; + }; + + template<typename InputTuple> + struct input_types<6, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type> type; + }; + + template<typename InputTuple> + struct input_types<7, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type> type; + }; + + + template<typename InputTuple> + struct input_types<8, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type> type; + }; + + + template<typename InputTuple> + struct input_types<9, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef typename std::tuple_element<8, InputTuple>::type nineth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type, nineth_type> type; + }; + + template<typename InputTuple> + struct input_types<10, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef typename std::tuple_element<8, InputTuple>::type nineth_type; + typedef typename std::tuple_element<9, InputTuple>::type tenth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type, nineth_type, + tenth_type> type; + }; + + // type generators + template<typename OutputTuple> + struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> { + static const int N = std::tuple_size<OutputTuple>::value; + typedef typename input_types<N, OutputTuple>::type output_type; + typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type; + typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type; + typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type; + }; + + template<class OutputTuple> + class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type { + public: + typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type; + typedef OutputTuple tuple_types; + typedef typename indexer_types<OutputTuple>::output_type output_type; + private: + typedef typename indexer_types<OutputTuple>::indexer_base_type base_type; + public: + unfolded_indexer_node(graph& g) : base_type(g) {} + unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {} + }; + +#endif /* __TBB__flow_graph_indexer_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h index 4466bf4180..84ec74c7ae 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h @@ -1,279 +1,279 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_item_buffer_impl_H -#define __TBB__flow_graph_item_buffer_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -#include "_aligned_space.h" - -// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h) - -//! Expandable buffer of items. The possible operations are push, pop, -//* tests for empty and so forth. No mutual exclusion is built in. -//* objects are constructed into and explicitly-destroyed. get_my_item gives -// a read-only reference to the item in the buffer. set_my_item may be called -// with either an empty or occupied slot. - -template <typename T, typename A=cache_aligned_allocator<T> > -class item_buffer { -public: - typedef T item_type; - enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; -protected: - typedef size_t size_type; - typedef std::pair<item_type, buffer_item_state> aligned_space_item; - typedef aligned_space<aligned_space_item> buffer_item_type; - typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type; - buffer_item_type *my_array; - size_type my_array_size; - static const size_type initial_buffer_size = 4; - size_type my_head; - size_type my_tail; - - bool buffer_empty() const { return my_head == my_tail; } - - aligned_space_item &item(size_type i) { - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value),NULL); - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); - return *my_array[i & (my_array_size - 1) ].begin(); - } - - const aligned_space_item &item(size_type i) const { - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), NULL); - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); - return *my_array[i & (my_array_size-1)].begin(); - } - - bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } -#if TBB_USE_ASSERT - bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } -#endif - - // object management in buffer - const item_type &get_my_item(size_t i) const { - __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); - item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first)); - return *itm; - } - - // may be called with an empty slot or a slot that has already been constructed into. - void set_my_item(size_t i, const item_type &o) { - if(item(i).second != no_item) { - destroy_item(i); - } - new(&(item(i).first)) item_type(o); - item(i).second = has_item; - } - - // destructively-fetch an object from the buffer - void fetch_item(size_t i, item_type &o) { - __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); - o = get_my_item(i); // could have std::move assign semantics - destroy_item(i); - } - - // move an existing item from one slot to another. The moved-to slot must be unoccupied, - // the moved-from slot must exist and not be reserved. The after, from will be empty, - // to will be occupied but not reserved - void move_item(size_t to, size_t from) { - __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); - __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); - set_my_item(to, get_my_item(from)); // could have std::move semantics - destroy_item(from); - - } - - // put an item in an empty slot. Return true if successful, else false - bool place_item(size_t here, const item_type &me) { -#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES - if(my_item_valid(here)) return false; -#endif - set_my_item(here, me); - return true; - } - - // could be implemented with std::move semantics - void swap_items(size_t i, size_t j) { - __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); - item_type temp = get_my_item(i); - set_my_item(i, get_my_item(j)); - set_my_item(j, temp); - } - - void destroy_item(size_type i) { - __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); - item(i).first.~item_type(); - item(i).second = no_item; - } - - // returns the front element - const item_type& front() const - { - __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); - return get_my_item(my_head); - } - - // returns the back element - const item_type& back() const - { - __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); - return get_my_item(my_tail - 1); - } - - // following methods are for reservation of the front of a buffer. - void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } - void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } - - void destroy_front() { destroy_item(my_head); ++my_head; } - void destroy_back() { destroy_item(my_tail-1); --my_tail; } - - // we have to be able to test against a new tail value without changing my_tail - // grow_array doesn't work if we change my_tail when the old array is too small - size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; } - size_type capacity() { return my_array_size; } - // sequencer_node does not use this method, so we don't - // need a version that passes in the new_tail value. - bool buffer_full() { return size() >= capacity(); } - - //! Grows the internal array. - void grow_my_array( size_t minimum_size ) { - // test that we haven't made the structure inconsistent. - __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity"); - size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size; - while( new_size<minimum_size ) - new_size*=2; - - buffer_item_type* new_array = allocator_type().allocate(new_size); - - // initialize validity to "no" - for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; } - - for( size_type i=my_head; i<my_tail; ++i) { - if(my_item_valid(i)) { // sequencer_node may have empty slots - // placement-new copy-construct; could be std::move - char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first); - (void)new(new_space) item_type(get_my_item(i)); - new_array[i&(new_size-1)].begin()->second = item(i).second; - } - } - - clean_up_buffer(/*reset_pointers*/false); - - my_array = new_array; - my_array_size = new_size; - } - - bool push_back(item_type &v) { - if(buffer_full()) { - grow_my_array(size() + 1); - } - set_my_item(my_tail, v); - ++my_tail; - return true; - } - - bool pop_back(item_type &v) { - if (!my_item_valid(my_tail-1)) { - return false; - } - v = this->back(); - destroy_back(); - return true; - } - - bool pop_front(item_type &v) { - if(!my_item_valid(my_head)) { - return false; - } - v = this->front(); - destroy_front(); - return true; - } - - // This is used both for reset and for grow_my_array. In the case of grow_my_array - // we want to retain the values of the head and tail. - void clean_up_buffer(bool reset_pointers) { - if (my_array) { - for( size_type i=my_head; i<my_tail; ++i ) { - if(my_item_valid(i)) - destroy_item(i); - } - allocator_type().deallocate(my_array,my_array_size); - } - my_array = NULL; - if(reset_pointers) { - my_head = my_tail = my_array_size = 0; - } - } - -public: - //! Constructor - item_buffer( ) : my_array(NULL), my_array_size(0), - my_head(0), my_tail(0) { - grow_my_array(initial_buffer_size); - } - - ~item_buffer() { - clean_up_buffer(/*reset_pointers*/true); - } - - void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); } - -}; - -//! item_buffer with reservable front-end. NOTE: if reserving, do not -//* complete operation with pop_front(); use consume_front(). -//* No synchronization built-in. -template<typename T, typename A=cache_aligned_allocator<T> > -class reservable_item_buffer : public item_buffer<T, A> { -protected: - using item_buffer<T, A>::my_item_valid; - using item_buffer<T, A>::my_head; - -public: - reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {} - void reset() {my_reserved = false; item_buffer<T,A>::reset(); } -protected: - - bool reserve_front(T &v) { - if(my_reserved || !my_item_valid(this->my_head)) return false; - my_reserved = true; - // reserving the head - v = this->front(); - this->reserve_item(this->my_head); - return true; - } - - void consume_front() { - __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); - this->destroy_front(); - my_reserved = false; - } - - void release_front() { - __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item"); - this->release_item(this->my_head); - my_reserved = false; - } - - bool my_reserved; -}; - -#endif // __TBB__flow_graph_item_buffer_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_item_buffer_impl_H +#define __TBB__flow_graph_item_buffer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_aligned_space.h" + +// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h) + +//! Expandable buffer of items. The possible operations are push, pop, +//* tests for empty and so forth. No mutual exclusion is built in. +//* objects are constructed into and explicitly-destroyed. get_my_item gives +// a read-only reference to the item in the buffer. set_my_item may be called +// with either an empty or occupied slot. + +template <typename T, typename A=cache_aligned_allocator<T> > +class item_buffer { +public: + typedef T item_type; + enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; +protected: + typedef size_t size_type; + typedef std::pair<item_type, buffer_item_state> aligned_space_item; + typedef aligned_space<aligned_space_item> buffer_item_type; + typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type; + buffer_item_type *my_array; + size_type my_array_size; + static const size_type initial_buffer_size = 4; + size_type my_head; + size_type my_tail; + + bool buffer_empty() const { return my_head == my_tail; } + + aligned_space_item &item(size_type i) { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value),NULL); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); + return *my_array[i & (my_array_size - 1) ].begin(); + } + + const aligned_space_item &item(size_type i) const { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), NULL); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); + return *my_array[i & (my_array_size-1)].begin(); + } + + bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } +#if TBB_USE_ASSERT + bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } +#endif + + // object management in buffer + const item_type &get_my_item(size_t i) const { + __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); + item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first)); + return *itm; + } + + // may be called with an empty slot or a slot that has already been constructed into. + void set_my_item(size_t i, const item_type &o) { + if(item(i).second != no_item) { + destroy_item(i); + } + new(&(item(i).first)) item_type(o); + item(i).second = has_item; + } + + // destructively-fetch an object from the buffer + void fetch_item(size_t i, item_type &o) { + __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); + o = get_my_item(i); // could have std::move assign semantics + destroy_item(i); + } + + // move an existing item from one slot to another. The moved-to slot must be unoccupied, + // the moved-from slot must exist and not be reserved. The after, from will be empty, + // to will be occupied but not reserved + void move_item(size_t to, size_t from) { + __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); + __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); + set_my_item(to, get_my_item(from)); // could have std::move semantics + destroy_item(from); + + } + + // put an item in an empty slot. Return true if successful, else false + bool place_item(size_t here, const item_type &me) { +#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES + if(my_item_valid(here)) return false; +#endif + set_my_item(here, me); + return true; + } + + // could be implemented with std::move semantics + void swap_items(size_t i, size_t j) { + __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); + item_type temp = get_my_item(i); + set_my_item(i, get_my_item(j)); + set_my_item(j, temp); + } + + void destroy_item(size_type i) { + __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); + item(i).first.~item_type(); + item(i).second = no_item; + } + + // returns the front element + const item_type& front() const + { + __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); + return get_my_item(my_head); + } + + // returns the back element + const item_type& back() const + { + __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); + return get_my_item(my_tail - 1); + } + + // following methods are for reservation of the front of a buffer. + void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } + void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } + + void destroy_front() { destroy_item(my_head); ++my_head; } + void destroy_back() { destroy_item(my_tail-1); --my_tail; } + + // we have to be able to test against a new tail value without changing my_tail + // grow_array doesn't work if we change my_tail when the old array is too small + size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; } + size_type capacity() { return my_array_size; } + // sequencer_node does not use this method, so we don't + // need a version that passes in the new_tail value. + bool buffer_full() { return size() >= capacity(); } + + //! Grows the internal array. + void grow_my_array( size_t minimum_size ) { + // test that we haven't made the structure inconsistent. + __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity"); + size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size; + while( new_size<minimum_size ) + new_size*=2; + + buffer_item_type* new_array = allocator_type().allocate(new_size); + + // initialize validity to "no" + for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; } + + for( size_type i=my_head; i<my_tail; ++i) { + if(my_item_valid(i)) { // sequencer_node may have empty slots + // placement-new copy-construct; could be std::move + char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first); + (void)new(new_space) item_type(get_my_item(i)); + new_array[i&(new_size-1)].begin()->second = item(i).second; + } + } + + clean_up_buffer(/*reset_pointers*/false); + + my_array = new_array; + my_array_size = new_size; + } + + bool push_back(item_type &v) { + if(buffer_full()) { + grow_my_array(size() + 1); + } + set_my_item(my_tail, v); + ++my_tail; + return true; + } + + bool pop_back(item_type &v) { + if (!my_item_valid(my_tail-1)) { + return false; + } + v = this->back(); + destroy_back(); + return true; + } + + bool pop_front(item_type &v) { + if(!my_item_valid(my_head)) { + return false; + } + v = this->front(); + destroy_front(); + return true; + } + + // This is used both for reset and for grow_my_array. In the case of grow_my_array + // we want to retain the values of the head and tail. + void clean_up_buffer(bool reset_pointers) { + if (my_array) { + for( size_type i=my_head; i<my_tail; ++i ) { + if(my_item_valid(i)) + destroy_item(i); + } + allocator_type().deallocate(my_array,my_array_size); + } + my_array = NULL; + if(reset_pointers) { + my_head = my_tail = my_array_size = 0; + } + } + +public: + //! Constructor + item_buffer( ) : my_array(NULL), my_array_size(0), + my_head(0), my_tail(0) { + grow_my_array(initial_buffer_size); + } + + ~item_buffer() { + clean_up_buffer(/*reset_pointers*/true); + } + + void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); } + +}; + +//! item_buffer with reservable front-end. NOTE: if reserving, do not +//* complete operation with pop_front(); use consume_front(). +//* No synchronization built-in. +template<typename T, typename A=cache_aligned_allocator<T> > +class reservable_item_buffer : public item_buffer<T, A> { +protected: + using item_buffer<T, A>::my_item_valid; + using item_buffer<T, A>::my_head; + +public: + reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {} + void reset() {my_reserved = false; item_buffer<T,A>::reset(); } +protected: + + bool reserve_front(T &v) { + if(my_reserved || !my_item_valid(this->my_head)) return false; + my_reserved = true; + // reserving the head + v = this->front(); + this->reserve_item(this->my_head); + return true; + } + + void consume_front() { + __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); + this->destroy_front(); + my_reserved = false; + } + + void release_front() { + __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item"); + this->release_item(this->my_head); + my_reserved = false; + } + + bool my_reserved; +}; + +#endif // __TBB__flow_graph_item_buffer_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h index 98b357cdbc..c8316edd56 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h @@ -1,1706 +1,1706 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_join_impl_H -#define __TBB__flow_graph_join_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included into namespace tbb::detail::d1 - - struct forwarding_base : no_assign { - forwarding_base(graph &g) : graph_ref(g) {} - virtual ~forwarding_base() {} - graph& graph_ref; - }; - - struct queueing_forwarding_base : forwarding_base { - using forwarding_base::forwarding_base; - // decrement_port_count may create a forwarding task. If we cannot handle the task - // ourselves, ask decrement_port_count to deal with it. - virtual graph_task* decrement_port_count(bool handle_task) = 0; - }; - - struct reserving_forwarding_base : forwarding_base { - using forwarding_base::forwarding_base; - // decrement_port_count may create a forwarding task. If we cannot handle the task - // ourselves, ask decrement_port_count to deal with it. - virtual graph_task* decrement_port_count() = 0; - virtual void increment_port_count() = 0; - }; - - // specialization that lets us keep a copy of the current_key for building results. - // KeyType can be a reference type. - template<typename KeyType> - struct matching_forwarding_base : public forwarding_base { - typedef typename std::decay<KeyType>::type current_key_type; - matching_forwarding_base(graph &g) : forwarding_base(g) { } - virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; - current_key_type current_key; // so ports can refer to FE's desired items - }; - - template< int N > - struct join_helper { - - template< typename TupleType, typename PortType > - static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { - std::get<N-1>( my_input ).set_join_node_pointer(port); - join_helper<N-1>::set_join_node_pointer( my_input, port ); - } - template< typename TupleType > - static inline void consume_reservations( TupleType &my_input ) { - std::get<N-1>( my_input ).consume(); - join_helper<N-1>::consume_reservations( my_input ); - } - - template< typename TupleType > - static inline void release_my_reservation( TupleType &my_input ) { - std::get<N-1>( my_input ).release(); - } - - template <typename TupleType> - static inline void release_reservations( TupleType &my_input) { - join_helper<N-1>::release_reservations(my_input); - release_my_reservation(my_input); - } - - template< typename InputTuple, typename OutputTuple > - static inline bool reserve( InputTuple &my_input, OutputTuple &out) { - if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false; - if ( !join_helper<N-1>::reserve( my_input, out ) ) { - release_my_reservation( my_input ); - return false; - } - return true; - } - - template<typename InputTuple, typename OutputTuple> - static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { - bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail - return join_helper<N-1>::get_my_item(my_input, out) && res; // do get on other inputs before returning - } - - template<typename InputTuple, typename OutputTuple> - static inline bool get_items(InputTuple &my_input, OutputTuple &out) { - return get_my_item(my_input, out); - } - - template<typename InputTuple> - static inline void reset_my_port(InputTuple &my_input) { - join_helper<N-1>::reset_my_port(my_input); - std::get<N-1>(my_input).reset_port(); - } - - template<typename InputTuple> - static inline void reset_ports(InputTuple& my_input) { - reset_my_port(my_input); - } - - template<typename InputTuple, typename KeyFuncTuple> - static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { - std::get<N-1>(my_input).set_my_key_func(std::get<N-1>(my_key_funcs)); - std::get<N-1>(my_key_funcs) = nullptr; - join_helper<N-1>::set_key_functors(my_input, my_key_funcs); - } - - template< typename KeyFuncTuple> - static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { - __TBB_ASSERT( - std::get<N-1>(other_inputs).get_my_key_func(), - "key matching join node should not be instantiated without functors." - ); - std::get<N-1>(my_inputs).set_my_key_func(std::get<N-1>(other_inputs).get_my_key_func()->clone()); - join_helper<N-1>::copy_key_functors(my_inputs, other_inputs); - } - - template<typename InputTuple> - static inline void reset_inputs(InputTuple &my_input, reset_flags f) { - join_helper<N-1>::reset_inputs(my_input, f); - std::get<N-1>(my_input).reset_receiver(f); - } - }; // join_helper<N> - - template< > - struct join_helper<1> { - - template< typename TupleType, typename PortType > - static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { - std::get<0>( my_input ).set_join_node_pointer(port); - } - - template< typename TupleType > - static inline void consume_reservations( TupleType &my_input ) { - std::get<0>( my_input ).consume(); - } - - template< typename TupleType > - static inline void release_my_reservation( TupleType &my_input ) { - std::get<0>( my_input ).release(); - } - - template<typename TupleType> - static inline void release_reservations( TupleType &my_input) { - release_my_reservation(my_input); - } - - template< typename InputTuple, typename OutputTuple > - static inline bool reserve( InputTuple &my_input, OutputTuple &out) { - return std::get<0>( my_input ).reserve( std::get<0>( out ) ); - } - - template<typename InputTuple, typename OutputTuple> - static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { - return std::get<0>(my_input).get_item(std::get<0>(out)); - } - - template<typename InputTuple, typename OutputTuple> - static inline bool get_items(InputTuple &my_input, OutputTuple &out) { - return get_my_item(my_input, out); - } - - template<typename InputTuple> - static inline void reset_my_port(InputTuple &my_input) { - std::get<0>(my_input).reset_port(); - } - - template<typename InputTuple> - static inline void reset_ports(InputTuple& my_input) { - reset_my_port(my_input); - } - - template<typename InputTuple, typename KeyFuncTuple> - static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { - std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); - std::get<0>(my_key_funcs) = nullptr; - } - - template< typename KeyFuncTuple> - static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { - __TBB_ASSERT( - std::get<0>(other_inputs).get_my_key_func(), - "key matching join node should not be instantiated without functors." - ); - std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); - } - template<typename InputTuple> - static inline void reset_inputs(InputTuple &my_input, reset_flags f) { - std::get<0>(my_input).reset_receiver(f); - } - }; // join_helper<1> - - //! The two-phase join port - template< typename T > - class reserving_port : public receiver<T> { - public: - typedef T input_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - - private: - // ----------- Aggregator ------------ - enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res - }; - typedef reserving_port<T> class_type; - - class reserving_port_operation : public aggregated_operation<reserving_port_operation> { - public: - char type; - union { - T *my_arg; - predecessor_type *my_pred; - }; - reserving_port_operation(const T& e, op_type t) : - type(char(t)), my_arg(const_cast<T*>(&e)) {} - reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), - my_pred(const_cast<predecessor_type *>(&s)) {} - reserving_port_operation(op_type t) : type(char(t)) {} - }; - - typedef aggregating_functor<class_type, reserving_port_operation> handler_type; - friend class aggregating_functor<class_type, reserving_port_operation>; - aggregator<handler_type, reserving_port_operation> my_aggregator; - - void handle_operations(reserving_port_operation* op_list) { - reserving_port_operation *current; - bool was_missing_predecessors = false; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - case reg_pred: - was_missing_predecessors = my_predecessors.empty(); - my_predecessors.add(*(current->my_pred)); - if ( was_missing_predecessors ) { - (void) my_join->decrement_port_count(); // may try to forward - } - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case rem_pred: - if ( !my_predecessors.empty() ) { - my_predecessors.remove(*(current->my_pred)); - if ( my_predecessors.empty() ) // was the last predecessor - my_join->increment_port_count(); - } - // TODO: consider returning failure if there were no predecessors to remove - current->status.store( SUCCEEDED, std::memory_order_release ); - break; - case res_item: - if ( reserved ) { - current->status.store( FAILED, std::memory_order_release); - } - else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { - reserved = true; - current->status.store( SUCCEEDED, std::memory_order_release); - } else { - if ( my_predecessors.empty() ) { - my_join->increment_port_count(); - } - current->status.store( FAILED, std::memory_order_release); - } - break; - case rel_res: - reserved = false; - my_predecessors.try_release( ); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case con_res: - reserved = false; - my_predecessors.try_consume( ); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - } - } - } - - protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task( const T & ) override { - return nullptr; - } - - graph& graph_reference() const override { - return my_join->graph_ref; - } - - public: - - //! Constructor - reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { - my_aggregator.initialize_handler(handler_type(this)); - } - - // copy constructor - reserving_port(const reserving_port& /* other */) = delete; - - void set_join_node_pointer(reserving_forwarding_base *join) { - my_join = join; - } - - //! Add a predecessor - bool register_predecessor( predecessor_type &src ) override { - reserving_port_operation op_data(src, reg_pred); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - //! Remove a predecessor - bool remove_predecessor( predecessor_type &src ) override { - reserving_port_operation op_data(src, rem_pred); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - //! Reserve an item from the port - bool reserve( T &v ) { - reserving_port_operation op_data(v, res_item); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - //! Release the port - void release( ) { - reserving_port_operation op_data(rel_res); - my_aggregator.execute(&op_data); - } - - //! Complete use of the port - void consume( ) { - reserving_port_operation op_data(con_res); - my_aggregator.execute(&op_data); - } - - void reset_receiver( reset_flags f) { - if(f & rf_clear_edges) my_predecessors.clear(); - else - my_predecessors.reset(); - reserved = false; - __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed"); - } - - private: -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - friend class get_graph_helper; -#endif - - reserving_forwarding_base *my_join; - reservable_predecessor_cache< T, null_mutex > my_predecessors; - bool reserved; - }; // reserving_port - - //! queueing join_port - template<typename T> - class queueing_port : public receiver<T>, public item_buffer<T> { - public: - typedef T input_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef queueing_port<T> class_type; - - // ----------- Aggregator ------------ - private: - enum op_type { get__item, res_port, try__put_task - }; - - class queueing_port_operation : public aggregated_operation<queueing_port_operation> { - public: - char type; - T my_val; - T* my_arg; - graph_task* bypass_t; - // constructor for value parameter - queueing_port_operation(const T& e, op_type t) : - type(char(t)), my_val(e) - , bypass_t(nullptr) - {} - // constructor for pointer parameter - queueing_port_operation(const T* p, op_type t) : - type(char(t)), my_arg(const_cast<T*>(p)) - , bypass_t(nullptr) - {} - // constructor with no parameter - queueing_port_operation(op_type t) : type(char(t)) - , bypass_t(nullptr) - {} - }; - - typedef aggregating_functor<class_type, queueing_port_operation> handler_type; - friend class aggregating_functor<class_type, queueing_port_operation>; - aggregator<handler_type, queueing_port_operation> my_aggregator; - - void handle_operations(queueing_port_operation* op_list) { - queueing_port_operation *current; - bool was_empty; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - case try__put_task: { - graph_task* rtask = nullptr; - was_empty = this->buffer_empty(); - this->push_back(current->my_val); - if (was_empty) rtask = my_join->decrement_port_count(false); - else - rtask = SUCCESSFULLY_ENQUEUED; - current->bypass_t = rtask; - current->status.store( SUCCEEDED, std::memory_order_release); - } - break; - case get__item: - if(!this->buffer_empty()) { - *(current->my_arg) = this->front(); - current->status.store( SUCCEEDED, std::memory_order_release); - } - else { - current->status.store( FAILED, std::memory_order_release); - } - break; - case res_port: - __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset"); - this->destroy_front(); - if(this->my_item_valid(this->my_head)) { - (void)my_join->decrement_port_count(true); - } - current->status.store( SUCCEEDED, std::memory_order_release); - break; - } - } - } - // ------------ End Aggregator --------------- - - protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task(const T &v) override { - queueing_port_operation op_data(v, try__put_task); - my_aggregator.execute(&op_data); - __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); - if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; - return op_data.bypass_t; - } - - graph& graph_reference() const override { - return my_join->graph_ref; - } - - public: - - //! Constructor - queueing_port() : item_buffer<T>() { - my_join = nullptr; - my_aggregator.initialize_handler(handler_type(this)); - } - - //! copy constructor - queueing_port(const queueing_port& /* other */) = delete; - - //! record parent for tallying available items - void set_join_node_pointer(queueing_forwarding_base *join) { - my_join = join; - } - - bool get_item( T &v ) { - queueing_port_operation op_data(&v, get__item); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - // reset_port is called when item is accepted by successor, but - // is initiated by join_node. - void reset_port() { - queueing_port_operation op_data(res_port); - my_aggregator.execute(&op_data); - return; - } - - void reset_receiver(reset_flags) { - item_buffer<T>::reset(); - } - - private: -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - friend class get_graph_helper; -#endif - - queueing_forwarding_base *my_join; - }; // queueing_port - -#include "_flow_graph_tagged_buffer_impl.h" - - template<typename K> - struct count_element { - K my_key; - size_t my_value; - }; - - // method to access the key in the counting table - // the ref has already been removed from K - template< typename K > - struct key_to_count_functor { - typedef count_element<K> table_item_type; - const K& operator()(const table_item_type& v) { return v.my_key; } - }; - - // the ports can have only one template parameter. We wrap the types needed in - // a traits type - template< class TraitsType > - class key_matching_port : - public receiver<typename TraitsType::T>, - public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, - typename TraitsType::KHash > { - public: - typedef TraitsType traits; - typedef key_matching_port<traits> class_type; - typedef typename TraitsType::T input_type; - typedef typename TraitsType::K key_type; - typedef typename std::decay<key_type>::type noref_key_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename TraitsType::TtoK type_to_key_func_type; - typedef typename TraitsType::KHash hash_compare_type; - typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; - - private: -// ----------- Aggregator ------------ - private: - enum op_type { try__put, get__item, res_port - }; - - class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> { - public: - char type; - input_type my_val; - input_type *my_arg; - // constructor for value parameter - key_matching_port_operation(const input_type& e, op_type t) : - type(char(t)), my_val(e) {} - // constructor for pointer parameter - key_matching_port_operation(const input_type* p, op_type t) : - type(char(t)), my_arg(const_cast<input_type*>(p)) {} - // constructor with no parameter - key_matching_port_operation(op_type t) : type(char(t)) {} - }; - - typedef aggregating_functor<class_type, key_matching_port_operation> handler_type; - friend class aggregating_functor<class_type, key_matching_port_operation>; - aggregator<handler_type, key_matching_port_operation> my_aggregator; - - void handle_operations(key_matching_port_operation* op_list) { - key_matching_port_operation *current; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - case try__put: { - bool was_inserted = this->insert_with_key(current->my_val); - // return failure if a duplicate insertion occurs - current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); - } - break; - case get__item: - // use current_key from FE for item - if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { - __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); - } - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case res_port: - // use current_key from FE for item - this->delete_with_key(my_join->current_key); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - } - } - } -// ------------ End Aggregator --------------- - protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task(const input_type& v) override { - key_matching_port_operation op_data(v, try__put); - graph_task* rtask = nullptr; - my_aggregator.execute(&op_data); - if(op_data.status == SUCCEEDED) { - rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn - // rtask has to reflect the return status of the try_put - if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; - } - return rtask; - } - - graph& graph_reference() const override { - return my_join->graph_ref; - } - - public: - - key_matching_port() : receiver<input_type>(), buffer_type() { - my_join = nullptr; - my_aggregator.initialize_handler(handler_type(this)); - } - - // copy constructor - key_matching_port(const key_matching_port& /*other*/) = delete; -#if __INTEL_COMPILER <= 2021 - // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited - // class while the parent class has the virtual keyword for the destrocutor. - virtual -#endif - ~key_matching_port() { } - - void set_join_node_pointer(forwarding_base *join) { - my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join); - } - - void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); } - - type_to_key_func_type* get_my_key_func() { return this->get_key_func(); } - - bool get_item( input_type &v ) { - // aggregator uses current_key from FE for Key - key_matching_port_operation op_data(&v, get__item); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - // reset_port is called when item is accepted by successor, but - // is initiated by join_node. - void reset_port() { - key_matching_port_operation op_data(res_port); - my_aggregator.execute(&op_data); - return; - } - - void reset_receiver(reset_flags ) { - buffer_type::reset(); - } - - private: - // my_join forwarding base used to count number of inputs that - // received key. - matching_forwarding_base<key_type> *my_join; - }; // key_matching_port - - using namespace graph_policy_namespace; - - template<typename JP, typename InputTuple, typename OutputTuple> - class join_node_base; - - //! join_node_FE : implements input port policy - template<typename JP, typename InputTuple, typename OutputTuple> - class join_node_FE; - - template<typename InputTuple, typename OutputTuple> - class join_node_FE<reserving, InputTuple, OutputTuple> : public reserving_forwarding_base { - public: - static const int N = std::tuple_size<OutputTuple>::value; - typedef OutputTuple output_type; - typedef InputTuple input_type; - typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding - - join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { - ports_with_no_inputs = N; - join_helper<N>::set_join_node_pointer(my_inputs, this); - } - - join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { - ports_with_no_inputs = N; - join_helper<N>::set_join_node_pointer(my_inputs, this); - } - - void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } - - void increment_port_count() override { - ++ports_with_no_inputs; - } - - // if all input_ports have predecessors, spawn forward to try and consume tuples - graph_task* decrement_port_count() override { - if(ports_with_no_inputs.fetch_sub(1) == 1) { - if(is_graph_active(this->graph_ref)) { - small_object_allocator allocator{}; - typedef forward_task_bypass<base_node_type> task_type; - graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); - graph_ref.reserve_wait(); - spawn_in_graph_arena(this->graph_ref, *t); - } - } - return nullptr; - } - - input_type &input_ports() { return my_inputs; } - - protected: - - void reset( reset_flags f) { - // called outside of parallel contexts - ports_with_no_inputs = N; - join_helper<N>::reset_inputs(my_inputs, f); - } - - // all methods on input ports should be called under mutual exclusion from join_node_base. - - bool tuple_build_may_succeed() { - return !ports_with_no_inputs; - } - - bool try_to_make_tuple(output_type &out) { - if(ports_with_no_inputs) return false; - return join_helper<N>::reserve(my_inputs, out); - } - - void tuple_accepted() { - join_helper<N>::consume_reservations(my_inputs); - } - void tuple_rejected() { - join_helper<N>::release_reservations(my_inputs); - } - - input_type my_inputs; - base_node_type *my_node; - std::atomic<std::size_t> ports_with_no_inputs; - }; // join_node_FE<reserving, ... > - - template<typename InputTuple, typename OutputTuple> - class join_node_FE<queueing, InputTuple, OutputTuple> : public queueing_forwarding_base { - public: - static const int N = std::tuple_size<OutputTuple>::value; - typedef OutputTuple output_type; - typedef InputTuple input_type; - typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding - - join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { - ports_with_no_items = N; - join_helper<N>::set_join_node_pointer(my_inputs, this); - } - - join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { - ports_with_no_items = N; - join_helper<N>::set_join_node_pointer(my_inputs, this); - } - - // needed for forwarding - void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } - - void reset_port_count() { - ports_with_no_items = N; - } - - // if all input_ports have items, spawn forward to try and consume tuples - graph_task* decrement_port_count(bool handle_task) override - { - if(ports_with_no_items.fetch_sub(1) == 1) { - if(is_graph_active(this->graph_ref)) { - small_object_allocator allocator{}; - typedef forward_task_bypass<base_node_type> task_type; - graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); - graph_ref.reserve_wait(); - if( !handle_task ) - return t; - spawn_in_graph_arena(this->graph_ref, *t); - } - } - return nullptr; - } - - input_type &input_ports() { return my_inputs; } - - protected: - - void reset( reset_flags f) { - reset_port_count(); - join_helper<N>::reset_inputs(my_inputs, f ); - } - - // all methods on input ports should be called under mutual exclusion from join_node_base. - - bool tuple_build_may_succeed() { - return !ports_with_no_items; - } - - bool try_to_make_tuple(output_type &out) { - if(ports_with_no_items) return false; - return join_helper<N>::get_items(my_inputs, out); - } - - void tuple_accepted() { - reset_port_count(); - join_helper<N>::reset_ports(my_inputs); - } - void tuple_rejected() { - // nothing to do. - } - - input_type my_inputs; - base_node_type *my_node; - std::atomic<std::size_t> ports_with_no_items; - }; // join_node_FE<queueing, ...> - - // key_matching join front-end. - template<typename InputTuple, typename OutputTuple, typename K, typename KHash> - class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>, - // buffer of key value counts - public hash_buffer< // typedefed below to key_to_count_buffer_type - typename std::decay<K>::type&, // force ref type on K - count_element<typename std::decay<K>::type>, - type_to_key_function_body< - count_element<typename std::decay<K>::type>, - typename std::decay<K>::type& >, - KHash >, - // buffer of output items - public item_buffer<OutputTuple> { - public: - static const int N = std::tuple_size<OutputTuple>::value; - typedef OutputTuple output_type; - typedef InputTuple input_type; - typedef K key_type; - typedef typename std::decay<key_type>::type unref_key_type; - typedef KHash key_hash_compare; - // must use K without ref. - typedef count_element<unref_key_type> count_element_type; - // method that lets us refer to the key of this type. - typedef key_to_count_functor<unref_key_type> key_to_count_func; - typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type; - typedef type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type; - // this is the type of the special table that keeps track of the number of discrete - // elements corresponding to each key that we've seen. - typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare > - key_to_count_buffer_type; - typedef item_buffer<output_type> output_buffer_type; - typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding - typedef matching_forwarding_base<key_type> forwarding_base_type; - -// ----------- Aggregator ------------ - // the aggregator is only needed to serialize the access to the hash table. - // and the output_buffer_type base class - private: - enum op_type { res_count, inc_count, may_succeed, try_make }; - typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type; - - class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> { - public: - char type; - unref_key_type my_val; - output_type* my_output; - graph_task* bypass_t; - // constructor for value parameter - key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), - my_output(nullptr), bypass_t(nullptr) {} - key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} - // constructor with no parameter - key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} - }; - - typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type; - friend class aggregating_functor<class_type, key_matching_FE_operation>; - aggregator<handler_type, key_matching_FE_operation> my_aggregator; - - // called from aggregator, so serialized - // returns a task pointer if the a task would have been enqueued but we asked that - // it be returned. Otherwise returns nullptr. - graph_task* fill_output_buffer(unref_key_type &t) { - output_type l_out; - graph_task* rtask = nullptr; - bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); - this->current_key = t; - this->delete_with_key(this->current_key); // remove the key - if(join_helper<N>::get_items(my_inputs, l_out)) { // <== call back - this->push_back(l_out); - if(do_fwd) { // we enqueue if receiving an item from predecessor, not if successor asks for item - small_object_allocator allocator{}; - typedef forward_task_bypass<base_node_type> task_type; - rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node); - this->graph_ref.reserve_wait(); - do_fwd = false; - } - // retire the input values - join_helper<N>::reset_ports(my_inputs); // <== call back - } - else { - __TBB_ASSERT(false, "should have had something to push"); - } - return rtask; - } - - void handle_operations(key_matching_FE_operation* op_list) { - key_matching_FE_operation *current; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - case res_count: // called from BE - { - this->destroy_front(); - current->status.store( SUCCEEDED, std::memory_order_release); - } - break; - case inc_count: { // called from input ports - count_element_type *p = 0; - unref_key_type &t = current->my_val; - if(!(this->find_ref_with_key(t,p))) { - count_element_type ev; - ev.my_key = t; - ev.my_value = 0; - this->insert_with_key(ev); - bool found = this->find_ref_with_key(t, p); - __TBB_ASSERT_EX(found, "should find key after inserting it"); - } - if(++(p->my_value) == size_t(N)) { - current->bypass_t = fill_output_buffer(t); - } - } - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case may_succeed: // called from BE - current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release); - break; - case try_make: // called from BE - if(this->buffer_empty()) { - current->status.store( FAILED, std::memory_order_release); - } - else { - *(current->my_output) = this->front(); - current->status.store( SUCCEEDED, std::memory_order_release); - } - break; - } - } - } -// ------------ End Aggregator --------------- - - public: - template<typename FunctionTuple> - join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { - join_helper<N>::set_join_node_pointer(my_inputs, this); - join_helper<N>::set_key_functors(my_inputs, TtoK_funcs); - my_aggregator.initialize_handler(handler_type(this)); - TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); - this->set_key_func(cfb); - } - - join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), - output_buffer_type() { - my_node = nullptr; - join_helper<N>::set_join_node_pointer(my_inputs, this); - join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs)); - my_aggregator.initialize_handler(handler_type(this)); - TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); - this->set_key_func(cfb); - } - - // needed for forwarding - void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } - - void reset_port_count() { // called from BE - key_matching_FE_operation op_data(res_count); - my_aggregator.execute(&op_data); - return; - } - - // if all input_ports have items, spawn forward to try and consume tuples - // return a task if we are asked and did create one. - graph_task *increment_key_count(unref_key_type const & t) override { // called from input_ports - key_matching_FE_operation op_data(t, inc_count); - my_aggregator.execute(&op_data); - return op_data.bypass_t; - } - - input_type &input_ports() { return my_inputs; } - - protected: - - void reset( reset_flags f ) { - // called outside of parallel contexts - join_helper<N>::reset_inputs(my_inputs, f); - - key_to_count_buffer_type::reset(); - output_buffer_type::reset(); - } - - // all methods on input ports should be called under mutual exclusion from join_node_base. - - bool tuple_build_may_succeed() { // called from back-end - key_matching_FE_operation op_data(may_succeed); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - // cannot lock while calling back to input_ports. current_key will only be set - // and reset under the aggregator, so it will remain consistent. - bool try_to_make_tuple(output_type &out) { - key_matching_FE_operation op_data(&out,try_make); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - void tuple_accepted() { - reset_port_count(); // reset current_key after ports reset. - } - - void tuple_rejected() { - // nothing to do. - } - - input_type my_inputs; // input ports - base_node_type *my_node; - }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> - - //! join_node_base - template<typename JP, typename InputTuple, typename OutputTuple> - class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>, - public sender<OutputTuple> { - protected: - using graph_node::my_graph; - public: - typedef OutputTuple output_type; - - typedef typename sender<output_type>::successor_type successor_type; - typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type; - using input_ports_type::tuple_build_may_succeed; - using input_ports_type::try_to_make_tuple; - using input_ports_type::tuple_accepted; - using input_ports_type::tuple_rejected; - - private: - // ----------- Aggregator ------------ - enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass - }; - typedef join_node_base<JP,InputTuple,OutputTuple> class_type; - - class join_node_base_operation : public aggregated_operation<join_node_base_operation> { - public: - char type; - union { - output_type *my_arg; - successor_type *my_succ; - }; - graph_task* bypass_t; - join_node_base_operation(const output_type& e, op_type t) : type(char(t)), - my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {} - join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), - my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {} - join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} - }; - - typedef aggregating_functor<class_type, join_node_base_operation> handler_type; - friend class aggregating_functor<class_type, join_node_base_operation>; - bool forwarder_busy; - aggregator<handler_type, join_node_base_operation> my_aggregator; - - void handle_operations(join_node_base_operation* op_list) { - join_node_base_operation *current; - while(op_list) { - current = op_list; - op_list = op_list->next; - switch(current->type) { - case reg_succ: { - my_successors.register_successor(*(current->my_succ)); - if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { - small_object_allocator allocator{}; - typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type; - graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - spawn_in_graph_arena(my_graph, *t); - forwarder_busy = true; - } - current->status.store( SUCCEEDED, std::memory_order_release); - } - break; - case rem_succ: - my_successors.remove_successor(*(current->my_succ)); - current->status.store( SUCCEEDED, std::memory_order_release); - break; - case try__get: - if(tuple_build_may_succeed()) { - if(try_to_make_tuple(*(current->my_arg))) { - tuple_accepted(); - current->status.store( SUCCEEDED, std::memory_order_release); - } - else current->status.store( FAILED, std::memory_order_release); - } - else current->status.store( FAILED, std::memory_order_release); - break; - case do_fwrd_bypass: { - bool build_succeeded; - graph_task *last_task = nullptr; - output_type out; - // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted - // are separate locked methods in the FE. We could conceivably fetch the front - // of the FE queue, then be swapped out, have someone else consume the FE's - // object, then come back, forward, and then try to remove it from the queue - // again. Without reservation of the FE, the methods accessing it must be locked. - // We could remember the keys of the objects we forwarded, and then remove - // them from the input ports after forwarding is complete? - if(tuple_build_may_succeed()) { // checks output queue of FE - do { - build_succeeded = try_to_make_tuple(out); // fetch front_end of queue - if(build_succeeded) { - graph_task *new_task = my_successors.try_put_task(out); - last_task = combine_tasks(my_graph, last_task, new_task); - if(new_task) { - tuple_accepted(); - } - else { - tuple_rejected(); - build_succeeded = false; - } - } - } while(build_succeeded); - } - current->bypass_t = last_task; - current->status.store( SUCCEEDED, std::memory_order_release); - forwarder_busy = false; - } - break; - } - } - } - // ---------- end aggregator ----------- - public: - join_node_base(graph &g) - : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) - { - input_ports_type::set_my_node(this); - my_aggregator.initialize_handler(handler_type(this)); - } - - join_node_base(const join_node_base& other) : - graph_node(other.graph_node::my_graph), input_ports_type(other), - sender<OutputTuple>(), forwarder_busy(false), my_successors(this) - { - input_ports_type::set_my_node(this); - my_aggregator.initialize_handler(handler_type(this)); - } - - template<typename FunctionTuple> - join_node_base(graph &g, FunctionTuple f) - : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) - { - input_ports_type::set_my_node(this); - my_aggregator.initialize_handler(handler_type(this)); - } - - bool register_successor(successor_type &r) override { - join_node_base_operation op_data(r, reg_succ); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - bool remove_successor( successor_type &r) override { - join_node_base_operation op_data(r, rem_succ); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - bool try_get( output_type &v) override { - join_node_base_operation op_data(v, try__get); - my_aggregator.execute(&op_data); - return op_data.status == SUCCEEDED; - } - - protected: - void reset_node(reset_flags f) override { - input_ports_type::reset(f); - if(f & rf_clear_edges) my_successors.clear(); - } - - private: - broadcast_cache<output_type, null_rw_mutex> my_successors; - - friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >; - graph_task *forward_task() { - join_node_base_operation op_data(do_fwrd_bypass); - my_aggregator.execute(&op_data); - return op_data.bypass_t; - } - - }; // join_node_base - - // join base class type generator - template<int N, template<class> class PT, typename OutputTuple, typename JP> - struct join_base { - typedef join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type; - }; - - template<int N, typename OutputTuple, typename K, typename KHash> - struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > { - typedef key_matching<K, KHash> key_traits_type; - typedef K key_type; - typedef KHash key_hash_compare; - typedef join_node_base< key_traits_type, - // ports type - typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type, - OutputTuple > type; - }; - - //! unfolded_join_node : passes input_ports_type to join_node_base. We build the input port type - // using tuple_element. The class PT is the port type (reserving_port, queueing_port, key_matching_port) - // and should match the typename. - - template<int N, template<class> class PT, typename OutputTuple, typename JP> - class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type { - public: - typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<JP, input_ports_type, output_type > base_type; - public: - unfolded_join_node(graph &g) : base_type(g) {} - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; - -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - template <typename K, typename T> - struct key_from_message_body { - K operator()(const T& t) const { - return key_from_message<K>(t); - } - }; - // Adds const to reference type - template <typename K, typename T> - struct key_from_message_body<K&,T> { - const K& operator()(const T& t) const { - return key_from_message<const K&>(t); - } - }; -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - // key_matching unfolded_join_node. This must be a separate specialization because the constructors - // differ. - - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - public: - typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef std::tuple< f0_p, f1_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1> - unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; - - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - public: - typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; - - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - public: - typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; - - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - public: - typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; - -#if __TBB_VARIADIC_MAX >= 6 - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - typedef typename std::tuple_element<5, OutputTuple>::type T5; - public: - typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef type_to_key_function_body<T5, K> *f5_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), - new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5) - : base_type(g, func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4), - new type_to_key_function_body_leaf<T5, K, Body5>(body5) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; -#endif - -#if __TBB_VARIADIC_MAX >= 7 - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - typedef typename std::tuple_element<5, OutputTuple>::type T5; - typedef typename std::tuple_element<6, OutputTuple>::type T6; - public: - typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef type_to_key_function_body<T5, K> *f5_p; - typedef type_to_key_function_body<T6, K> *f6_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), - new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), - new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, - typename Body5, typename Body6> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, - Body5 body5, Body6 body6) : base_type(g, func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4), - new type_to_key_function_body_leaf<T5, K, Body5>(body5), - new type_to_key_function_body_leaf<T6, K, Body6>(body6) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; -#endif - -#if __TBB_VARIADIC_MAX >= 8 - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - typedef typename std::tuple_element<5, OutputTuple>::type T5; - typedef typename std::tuple_element<6, OutputTuple>::type T6; - typedef typename std::tuple_element<7, OutputTuple>::type T7; - public: - typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef type_to_key_function_body<T5, K> *f5_p; - typedef type_to_key_function_body<T6, K> *f6_p; - typedef type_to_key_function_body<T7, K> *f7_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), - new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), - new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), - new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, - typename Body5, typename Body6, typename Body7> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, - Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4), - new type_to_key_function_body_leaf<T5, K, Body5>(body5), - new type_to_key_function_body_leaf<T6, K, Body6>(body6), - new type_to_key_function_body_leaf<T7, K, Body7>(body7) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; -#endif - -#if __TBB_VARIADIC_MAX >= 9 - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - typedef typename std::tuple_element<5, OutputTuple>::type T5; - typedef typename std::tuple_element<6, OutputTuple>::type T6; - typedef typename std::tuple_element<7, OutputTuple>::type T7; - typedef typename std::tuple_element<8, OutputTuple>::type T8; - public: - typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef type_to_key_function_body<T5, K> *f5_p; - typedef type_to_key_function_body<T6, K> *f6_p; - typedef type_to_key_function_body<T7, K> *f7_p; - typedef type_to_key_function_body<T8, K> *f8_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), - new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), - new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), - new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), - new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, - typename Body5, typename Body6, typename Body7, typename Body8> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, - Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4), - new type_to_key_function_body_leaf<T5, K, Body5>(body5), - new type_to_key_function_body_leaf<T6, K, Body6>(body6), - new type_to_key_function_body_leaf<T7, K, Body7>(body7), - new type_to_key_function_body_leaf<T8, K, Body8>(body8) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; -#endif - -#if __TBB_VARIADIC_MAX >= 10 - template<typename OutputTuple, typename K, typename KHash> - class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public - join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { - typedef typename std::tuple_element<0, OutputTuple>::type T0; - typedef typename std::tuple_element<1, OutputTuple>::type T1; - typedef typename std::tuple_element<2, OutputTuple>::type T2; - typedef typename std::tuple_element<3, OutputTuple>::type T3; - typedef typename std::tuple_element<4, OutputTuple>::type T4; - typedef typename std::tuple_element<5, OutputTuple>::type T5; - typedef typename std::tuple_element<6, OutputTuple>::type T6; - typedef typename std::tuple_element<7, OutputTuple>::type T7; - typedef typename std::tuple_element<8, OutputTuple>::type T8; - typedef typename std::tuple_element<9, OutputTuple>::type T9; - public: - typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; - typedef OutputTuple output_type; - private: - typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; - typedef type_to_key_function_body<T0, K> *f0_p; - typedef type_to_key_function_body<T1, K> *f1_p; - typedef type_to_key_function_body<T2, K> *f2_p; - typedef type_to_key_function_body<T3, K> *f3_p; - typedef type_to_key_function_body<T4, K> *f4_p; - typedef type_to_key_function_body<T5, K> *f5_p; - typedef type_to_key_function_body<T6, K> *f6_p; - typedef type_to_key_function_body<T7, K> *f7_p; - typedef type_to_key_function_body<T8, K> *f8_p; - typedef type_to_key_function_body<T9, K> *f9_p; - typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type; - public: -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - unfolded_join_node(graph &g) : base_type(g, - func_initializer_type( - new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), - new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), - new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), - new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), - new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), - new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), - new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), - new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), - new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()), - new type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>()) - ) ) { - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, - typename Body5, typename Body6, typename Body7, typename Body8, typename Body9> - unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, - Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type( - new type_to_key_function_body_leaf<T0, K, Body0>(body0), - new type_to_key_function_body_leaf<T1, K, Body1>(body1), - new type_to_key_function_body_leaf<T2, K, Body2>(body2), - new type_to_key_function_body_leaf<T3, K, Body3>(body3), - new type_to_key_function_body_leaf<T4, K, Body4>(body4), - new type_to_key_function_body_leaf<T5, K, Body5>(body5), - new type_to_key_function_body_leaf<T6, K, Body6>(body6), - new type_to_key_function_body_leaf<T7, K, Body7>(body7), - new type_to_key_function_body_leaf<T8, K, Body8>(body8), - new type_to_key_function_body_leaf<T9, K, Body9>(body9) - ) ) { - static_assert(std::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers"); - } - unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} - }; -#endif - - //! templated function to refer to input ports of the join node - template<size_t N, typename JNT> - typename std::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) { - return std::get<N>(jn.input_ports()); - } - -#endif // __TBB__flow_graph_join_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_join_impl_H +#define __TBB__flow_graph_join_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included into namespace tbb::detail::d1 + + struct forwarding_base : no_assign { + forwarding_base(graph &g) : graph_ref(g) {} + virtual ~forwarding_base() {} + graph& graph_ref; + }; + + struct queueing_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; + // decrement_port_count may create a forwarding task. If we cannot handle the task + // ourselves, ask decrement_port_count to deal with it. + virtual graph_task* decrement_port_count(bool handle_task) = 0; + }; + + struct reserving_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; + // decrement_port_count may create a forwarding task. If we cannot handle the task + // ourselves, ask decrement_port_count to deal with it. + virtual graph_task* decrement_port_count() = 0; + virtual void increment_port_count() = 0; + }; + + // specialization that lets us keep a copy of the current_key for building results. + // KeyType can be a reference type. + template<typename KeyType> + struct matching_forwarding_base : public forwarding_base { + typedef typename std::decay<KeyType>::type current_key_type; + matching_forwarding_base(graph &g) : forwarding_base(g) { } + virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; + current_key_type current_key; // so ports can refer to FE's desired items + }; + + template< int N > + struct join_helper { + + template< typename TupleType, typename PortType > + static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { + std::get<N-1>( my_input ).set_join_node_pointer(port); + join_helper<N-1>::set_join_node_pointer( my_input, port ); + } + template< typename TupleType > + static inline void consume_reservations( TupleType &my_input ) { + std::get<N-1>( my_input ).consume(); + join_helper<N-1>::consume_reservations( my_input ); + } + + template< typename TupleType > + static inline void release_my_reservation( TupleType &my_input ) { + std::get<N-1>( my_input ).release(); + } + + template <typename TupleType> + static inline void release_reservations( TupleType &my_input) { + join_helper<N-1>::release_reservations(my_input); + release_my_reservation(my_input); + } + + template< typename InputTuple, typename OutputTuple > + static inline bool reserve( InputTuple &my_input, OutputTuple &out) { + if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false; + if ( !join_helper<N-1>::reserve( my_input, out ) ) { + release_my_reservation( my_input ); + return false; + } + return true; + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { + bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail + return join_helper<N-1>::get_my_item(my_input, out) && res; // do get on other inputs before returning + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple &my_input, OutputTuple &out) { + return get_my_item(my_input, out); + } + + template<typename InputTuple> + static inline void reset_my_port(InputTuple &my_input) { + join_helper<N-1>::reset_my_port(my_input); + std::get<N-1>(my_input).reset_port(); + } + + template<typename InputTuple> + static inline void reset_ports(InputTuple& my_input) { + reset_my_port(my_input); + } + + template<typename InputTuple, typename KeyFuncTuple> + static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { + std::get<N-1>(my_input).set_my_key_func(std::get<N-1>(my_key_funcs)); + std::get<N-1>(my_key_funcs) = nullptr; + join_helper<N-1>::set_key_functors(my_input, my_key_funcs); + } + + template< typename KeyFuncTuple> + static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { + __TBB_ASSERT( + std::get<N-1>(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get<N-1>(my_inputs).set_my_key_func(std::get<N-1>(other_inputs).get_my_key_func()->clone()); + join_helper<N-1>::copy_key_functors(my_inputs, other_inputs); + } + + template<typename InputTuple> + static inline void reset_inputs(InputTuple &my_input, reset_flags f) { + join_helper<N-1>::reset_inputs(my_input, f); + std::get<N-1>(my_input).reset_receiver(f); + } + }; // join_helper<N> + + template< > + struct join_helper<1> { + + template< typename TupleType, typename PortType > + static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { + std::get<0>( my_input ).set_join_node_pointer(port); + } + + template< typename TupleType > + static inline void consume_reservations( TupleType &my_input ) { + std::get<0>( my_input ).consume(); + } + + template< typename TupleType > + static inline void release_my_reservation( TupleType &my_input ) { + std::get<0>( my_input ).release(); + } + + template<typename TupleType> + static inline void release_reservations( TupleType &my_input) { + release_my_reservation(my_input); + } + + template< typename InputTuple, typename OutputTuple > + static inline bool reserve( InputTuple &my_input, OutputTuple &out) { + return std::get<0>( my_input ).reserve( std::get<0>( out ) ); + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { + return std::get<0>(my_input).get_item(std::get<0>(out)); + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple &my_input, OutputTuple &out) { + return get_my_item(my_input, out); + } + + template<typename InputTuple> + static inline void reset_my_port(InputTuple &my_input) { + std::get<0>(my_input).reset_port(); + } + + template<typename InputTuple> + static inline void reset_ports(InputTuple& my_input) { + reset_my_port(my_input); + } + + template<typename InputTuple, typename KeyFuncTuple> + static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { + std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); + std::get<0>(my_key_funcs) = nullptr; + } + + template< typename KeyFuncTuple> + static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { + __TBB_ASSERT( + std::get<0>(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); + } + template<typename InputTuple> + static inline void reset_inputs(InputTuple &my_input, reset_flags f) { + std::get<0>(my_input).reset_receiver(f); + } + }; // join_helper<1> + + //! The two-phase join port + template< typename T > + class reserving_port : public receiver<T> { + public: + typedef T input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res + }; + typedef reserving_port<T> class_type; + + class reserving_port_operation : public aggregated_operation<reserving_port_operation> { + public: + char type; + union { + T *my_arg; + predecessor_type *my_pred; + }; + reserving_port_operation(const T& e, op_type t) : + type(char(t)), my_arg(const_cast<T*>(&e)) {} + reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), + my_pred(const_cast<predecessor_type *>(&s)) {} + reserving_port_operation(op_type t) : type(char(t)) {} + }; + + typedef aggregating_functor<class_type, reserving_port_operation> handler_type; + friend class aggregating_functor<class_type, reserving_port_operation>; + aggregator<handler_type, reserving_port_operation> my_aggregator; + + void handle_operations(reserving_port_operation* op_list) { + reserving_port_operation *current; + bool was_missing_predecessors = false; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case reg_pred: + was_missing_predecessors = my_predecessors.empty(); + my_predecessors.add(*(current->my_pred)); + if ( was_missing_predecessors ) { + (void) my_join->decrement_port_count(); // may try to forward + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case rem_pred: + if ( !my_predecessors.empty() ) { + my_predecessors.remove(*(current->my_pred)); + if ( my_predecessors.empty() ) // was the last predecessor + my_join->increment_port_count(); + } + // TODO: consider returning failure if there were no predecessors to remove + current->status.store( SUCCEEDED, std::memory_order_release ); + break; + case res_item: + if ( reserved ) { + current->status.store( FAILED, std::memory_order_release); + } + else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { + reserved = true; + current->status.store( SUCCEEDED, std::memory_order_release); + } else { + if ( my_predecessors.empty() ) { + my_join->increment_port_count(); + } + current->status.store( FAILED, std::memory_order_release); + } + break; + case rel_res: + reserved = false; + my_predecessors.try_release( ); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case con_res: + reserved = false; + my_predecessors.try_consume( ); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task( const T & ) override { + return nullptr; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + //! Constructor + reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { + my_aggregator.initialize_handler(handler_type(this)); + } + + // copy constructor + reserving_port(const reserving_port& /* other */) = delete; + + void set_join_node_pointer(reserving_forwarding_base *join) { + my_join = join; + } + + //! Add a predecessor + bool register_predecessor( predecessor_type &src ) override { + reserving_port_operation op_data(src, reg_pred); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Remove a predecessor + bool remove_predecessor( predecessor_type &src ) override { + reserving_port_operation op_data(src, rem_pred); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Reserve an item from the port + bool reserve( T &v ) { + reserving_port_operation op_data(v, res_item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Release the port + void release( ) { + reserving_port_operation op_data(rel_res); + my_aggregator.execute(&op_data); + } + + //! Complete use of the port + void consume( ) { + reserving_port_operation op_data(con_res); + my_aggregator.execute(&op_data); + } + + void reset_receiver( reset_flags f) { + if(f & rf_clear_edges) my_predecessors.clear(); + else + my_predecessors.reset(); + reserved = false; + __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed"); + } + + private: +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + + reserving_forwarding_base *my_join; + reservable_predecessor_cache< T, null_mutex > my_predecessors; + bool reserved; + }; // reserving_port + + //! queueing join_port + template<typename T> + class queueing_port : public receiver<T>, public item_buffer<T> { + public: + typedef T input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef queueing_port<T> class_type; + + // ----------- Aggregator ------------ + private: + enum op_type { get__item, res_port, try__put_task + }; + + class queueing_port_operation : public aggregated_operation<queueing_port_operation> { + public: + char type; + T my_val; + T* my_arg; + graph_task* bypass_t; + // constructor for value parameter + queueing_port_operation(const T& e, op_type t) : + type(char(t)), my_val(e) + , bypass_t(nullptr) + {} + // constructor for pointer parameter + queueing_port_operation(const T* p, op_type t) : + type(char(t)), my_arg(const_cast<T*>(p)) + , bypass_t(nullptr) + {} + // constructor with no parameter + queueing_port_operation(op_type t) : type(char(t)) + , bypass_t(nullptr) + {} + }; + + typedef aggregating_functor<class_type, queueing_port_operation> handler_type; + friend class aggregating_functor<class_type, queueing_port_operation>; + aggregator<handler_type, queueing_port_operation> my_aggregator; + + void handle_operations(queueing_port_operation* op_list) { + queueing_port_operation *current; + bool was_empty; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case try__put_task: { + graph_task* rtask = nullptr; + was_empty = this->buffer_empty(); + this->push_back(current->my_val); + if (was_empty) rtask = my_join->decrement_port_count(false); + else + rtask = SUCCESSFULLY_ENQUEUED; + current->bypass_t = rtask; + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case get__item: + if(!this->buffer_empty()) { + *(current->my_arg) = this->front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + else { + current->status.store( FAILED, std::memory_order_release); + } + break; + case res_port: + __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset"); + this->destroy_front(); + if(this->my_item_valid(this->my_head)) { + (void)my_join->decrement_port_count(true); + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } + // ------------ End Aggregator --------------- + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const T &v) override { + queueing_port_operation op_data(v, try__put_task); + my_aggregator.execute(&op_data); + __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); + if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; + return op_data.bypass_t; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + //! Constructor + queueing_port() : item_buffer<T>() { + my_join = nullptr; + my_aggregator.initialize_handler(handler_type(this)); + } + + //! copy constructor + queueing_port(const queueing_port& /* other */) = delete; + + //! record parent for tallying available items + void set_join_node_pointer(queueing_forwarding_base *join) { + my_join = join; + } + + bool get_item( T &v ) { + queueing_port_operation op_data(&v, get__item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // reset_port is called when item is accepted by successor, but + // is initiated by join_node. + void reset_port() { + queueing_port_operation op_data(res_port); + my_aggregator.execute(&op_data); + return; + } + + void reset_receiver(reset_flags) { + item_buffer<T>::reset(); + } + + private: +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + + queueing_forwarding_base *my_join; + }; // queueing_port + +#include "_flow_graph_tagged_buffer_impl.h" + + template<typename K> + struct count_element { + K my_key; + size_t my_value; + }; + + // method to access the key in the counting table + // the ref has already been removed from K + template< typename K > + struct key_to_count_functor { + typedef count_element<K> table_item_type; + const K& operator()(const table_item_type& v) { return v.my_key; } + }; + + // the ports can have only one template parameter. We wrap the types needed in + // a traits type + template< class TraitsType > + class key_matching_port : + public receiver<typename TraitsType::T>, + public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, + typename TraitsType::KHash > { + public: + typedef TraitsType traits; + typedef key_matching_port<traits> class_type; + typedef typename TraitsType::T input_type; + typedef typename TraitsType::K key_type; + typedef typename std::decay<key_type>::type noref_key_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename TraitsType::TtoK type_to_key_func_type; + typedef typename TraitsType::KHash hash_compare_type; + typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; + + private: +// ----------- Aggregator ------------ + private: + enum op_type { try__put, get__item, res_port + }; + + class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> { + public: + char type; + input_type my_val; + input_type *my_arg; + // constructor for value parameter + key_matching_port_operation(const input_type& e, op_type t) : + type(char(t)), my_val(e) {} + // constructor for pointer parameter + key_matching_port_operation(const input_type* p, op_type t) : + type(char(t)), my_arg(const_cast<input_type*>(p)) {} + // constructor with no parameter + key_matching_port_operation(op_type t) : type(char(t)) {} + }; + + typedef aggregating_functor<class_type, key_matching_port_operation> handler_type; + friend class aggregating_functor<class_type, key_matching_port_operation>; + aggregator<handler_type, key_matching_port_operation> my_aggregator; + + void handle_operations(key_matching_port_operation* op_list) { + key_matching_port_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case try__put: { + bool was_inserted = this->insert_with_key(current->my_val); + // return failure if a duplicate insertion occurs + current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); + } + break; + case get__item: + // use current_key from FE for item + if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { + __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case res_port: + // use current_key from FE for item + this->delete_with_key(my_join->current_key); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } +// ------------ End Aggregator --------------- + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const input_type& v) override { + key_matching_port_operation op_data(v, try__put); + graph_task* rtask = nullptr; + my_aggregator.execute(&op_data); + if(op_data.status == SUCCEEDED) { + rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn + // rtask has to reflect the return status of the try_put + if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; + } + return rtask; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + key_matching_port() : receiver<input_type>(), buffer_type() { + my_join = nullptr; + my_aggregator.initialize_handler(handler_type(this)); + } + + // copy constructor + key_matching_port(const key_matching_port& /*other*/) = delete; +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif + ~key_matching_port() { } + + void set_join_node_pointer(forwarding_base *join) { + my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join); + } + + void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); } + + type_to_key_func_type* get_my_key_func() { return this->get_key_func(); } + + bool get_item( input_type &v ) { + // aggregator uses current_key from FE for Key + key_matching_port_operation op_data(&v, get__item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // reset_port is called when item is accepted by successor, but + // is initiated by join_node. + void reset_port() { + key_matching_port_operation op_data(res_port); + my_aggregator.execute(&op_data); + return; + } + + void reset_receiver(reset_flags ) { + buffer_type::reset(); + } + + private: + // my_join forwarding base used to count number of inputs that + // received key. + matching_forwarding_base<key_type> *my_join; + }; // key_matching_port + + using namespace graph_policy_namespace; + + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_base; + + //! join_node_FE : implements input port policy + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_FE; + + template<typename InputTuple, typename OutputTuple> + class join_node_FE<reserving, InputTuple, OutputTuple> : public reserving_forwarding_base { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding + + join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { + ports_with_no_inputs = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { + ports_with_no_inputs = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void increment_port_count() override { + ++ports_with_no_inputs; + } + + // if all input_ports have predecessors, spawn forward to try and consume tuples + graph_task* decrement_port_count() override { + if(ports_with_no_inputs.fetch_sub(1) == 1) { + if(is_graph_active(this->graph_ref)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); + graph_ref.reserve_wait(); + spawn_in_graph_arena(this->graph_ref, *t); + } + } + return nullptr; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f) { + // called outside of parallel contexts + ports_with_no_inputs = N; + join_helper<N>::reset_inputs(my_inputs, f); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { + return !ports_with_no_inputs; + } + + bool try_to_make_tuple(output_type &out) { + if(ports_with_no_inputs) return false; + return join_helper<N>::reserve(my_inputs, out); + } + + void tuple_accepted() { + join_helper<N>::consume_reservations(my_inputs); + } + void tuple_rejected() { + join_helper<N>::release_reservations(my_inputs); + } + + input_type my_inputs; + base_node_type *my_node; + std::atomic<std::size_t> ports_with_no_inputs; + }; // join_node_FE<reserving, ... > + + template<typename InputTuple, typename OutputTuple> + class join_node_FE<queueing, InputTuple, OutputTuple> : public queueing_forwarding_base { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding + + join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { + ports_with_no_items = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { + ports_with_no_items = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + // needed for forwarding + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void reset_port_count() { + ports_with_no_items = N; + } + + // if all input_ports have items, spawn forward to try and consume tuples + graph_task* decrement_port_count(bool handle_task) override + { + if(ports_with_no_items.fetch_sub(1) == 1) { + if(is_graph_active(this->graph_ref)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); + graph_ref.reserve_wait(); + if( !handle_task ) + return t; + spawn_in_graph_arena(this->graph_ref, *t); + } + } + return nullptr; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f) { + reset_port_count(); + join_helper<N>::reset_inputs(my_inputs, f ); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { + return !ports_with_no_items; + } + + bool try_to_make_tuple(output_type &out) { + if(ports_with_no_items) return false; + return join_helper<N>::get_items(my_inputs, out); + } + + void tuple_accepted() { + reset_port_count(); + join_helper<N>::reset_ports(my_inputs); + } + void tuple_rejected() { + // nothing to do. + } + + input_type my_inputs; + base_node_type *my_node; + std::atomic<std::size_t> ports_with_no_items; + }; // join_node_FE<queueing, ...> + + // key_matching join front-end. + template<typename InputTuple, typename OutputTuple, typename K, typename KHash> + class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>, + // buffer of key value counts + public hash_buffer< // typedefed below to key_to_count_buffer_type + typename std::decay<K>::type&, // force ref type on K + count_element<typename std::decay<K>::type>, + type_to_key_function_body< + count_element<typename std::decay<K>::type>, + typename std::decay<K>::type& >, + KHash >, + // buffer of output items + public item_buffer<OutputTuple> { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef K key_type; + typedef typename std::decay<key_type>::type unref_key_type; + typedef KHash key_hash_compare; + // must use K without ref. + typedef count_element<unref_key_type> count_element_type; + // method that lets us refer to the key of this type. + typedef key_to_count_functor<unref_key_type> key_to_count_func; + typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type; + typedef type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type; + // this is the type of the special table that keeps track of the number of discrete + // elements corresponding to each key that we've seen. + typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare > + key_to_count_buffer_type; + typedef item_buffer<output_type> output_buffer_type; + typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding + typedef matching_forwarding_base<key_type> forwarding_base_type; + +// ----------- Aggregator ------------ + // the aggregator is only needed to serialize the access to the hash table. + // and the output_buffer_type base class + private: + enum op_type { res_count, inc_count, may_succeed, try_make }; + typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type; + + class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> { + public: + char type; + unref_key_type my_val; + output_type* my_output; + graph_task* bypass_t; + // constructor for value parameter + key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), + my_output(nullptr), bypass_t(nullptr) {} + key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} + // constructor with no parameter + key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} + }; + + typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type; + friend class aggregating_functor<class_type, key_matching_FE_operation>; + aggregator<handler_type, key_matching_FE_operation> my_aggregator; + + // called from aggregator, so serialized + // returns a task pointer if the a task would have been enqueued but we asked that + // it be returned. Otherwise returns nullptr. + graph_task* fill_output_buffer(unref_key_type &t) { + output_type l_out; + graph_task* rtask = nullptr; + bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); + this->current_key = t; + this->delete_with_key(this->current_key); // remove the key + if(join_helper<N>::get_items(my_inputs, l_out)) { // <== call back + this->push_back(l_out); + if(do_fwd) { // we enqueue if receiving an item from predecessor, not if successor asks for item + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node); + this->graph_ref.reserve_wait(); + do_fwd = false; + } + // retire the input values + join_helper<N>::reset_ports(my_inputs); // <== call back + } + else { + __TBB_ASSERT(false, "should have had something to push"); + } + return rtask; + } + + void handle_operations(key_matching_FE_operation* op_list) { + key_matching_FE_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case res_count: // called from BE + { + this->destroy_front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case inc_count: { // called from input ports + count_element_type *p = 0; + unref_key_type &t = current->my_val; + if(!(this->find_ref_with_key(t,p))) { + count_element_type ev; + ev.my_key = t; + ev.my_value = 0; + this->insert_with_key(ev); + bool found = this->find_ref_with_key(t, p); + __TBB_ASSERT_EX(found, "should find key after inserting it"); + } + if(++(p->my_value) == size_t(N)) { + current->bypass_t = fill_output_buffer(t); + } + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case may_succeed: // called from BE + current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release); + break; + case try_make: // called from BE + if(this->buffer_empty()) { + current->status.store( FAILED, std::memory_order_release); + } + else { + *(current->my_output) = this->front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + } + } + } +// ------------ End Aggregator --------------- + + public: + template<typename FunctionTuple> + join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { + join_helper<N>::set_join_node_pointer(my_inputs, this); + join_helper<N>::set_key_functors(my_inputs, TtoK_funcs); + my_aggregator.initialize_handler(handler_type(this)); + TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); + this->set_key_func(cfb); + } + + join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), + output_buffer_type() { + my_node = nullptr; + join_helper<N>::set_join_node_pointer(my_inputs, this); + join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs)); + my_aggregator.initialize_handler(handler_type(this)); + TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); + this->set_key_func(cfb); + } + + // needed for forwarding + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void reset_port_count() { // called from BE + key_matching_FE_operation op_data(res_count); + my_aggregator.execute(&op_data); + return; + } + + // if all input_ports have items, spawn forward to try and consume tuples + // return a task if we are asked and did create one. + graph_task *increment_key_count(unref_key_type const & t) override { // called from input_ports + key_matching_FE_operation op_data(t, inc_count); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f ) { + // called outside of parallel contexts + join_helper<N>::reset_inputs(my_inputs, f); + + key_to_count_buffer_type::reset(); + output_buffer_type::reset(); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { // called from back-end + key_matching_FE_operation op_data(may_succeed); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // cannot lock while calling back to input_ports. current_key will only be set + // and reset under the aggregator, so it will remain consistent. + bool try_to_make_tuple(output_type &out) { + key_matching_FE_operation op_data(&out,try_make); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + void tuple_accepted() { + reset_port_count(); // reset current_key after ports reset. + } + + void tuple_rejected() { + // nothing to do. + } + + input_type my_inputs; // input ports + base_node_type *my_node; + }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> + + //! join_node_base + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>, + public sender<OutputTuple> { + protected: + using graph_node::my_graph; + public: + typedef OutputTuple output_type; + + typedef typename sender<output_type>::successor_type successor_type; + typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type; + using input_ports_type::tuple_build_may_succeed; + using input_ports_type::try_to_make_tuple; + using input_ports_type::tuple_accepted; + using input_ports_type::tuple_rejected; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass + }; + typedef join_node_base<JP,InputTuple,OutputTuple> class_type; + + class join_node_base_operation : public aggregated_operation<join_node_base_operation> { + public: + char type; + union { + output_type *my_arg; + successor_type *my_succ; + }; + graph_task* bypass_t; + join_node_base_operation(const output_type& e, op_type t) : type(char(t)), + my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {} + join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), + my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {} + join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} + }; + + typedef aggregating_functor<class_type, join_node_base_operation> handler_type; + friend class aggregating_functor<class_type, join_node_base_operation>; + bool forwarder_busy; + aggregator<handler_type, join_node_base_operation> my_aggregator; + + void handle_operations(join_node_base_operation* op_list) { + join_node_base_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case reg_succ: { + my_successors.register_successor(*(current->my_succ)); + if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { + small_object_allocator allocator{}; + typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(my_graph, *t); + forwarder_busy = true; + } + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case rem_succ: + my_successors.remove_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case try__get: + if(tuple_build_may_succeed()) { + if(try_to_make_tuple(*(current->my_arg))) { + tuple_accepted(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + else current->status.store( FAILED, std::memory_order_release); + } + else current->status.store( FAILED, std::memory_order_release); + break; + case do_fwrd_bypass: { + bool build_succeeded; + graph_task *last_task = nullptr; + output_type out; + // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted + // are separate locked methods in the FE. We could conceivably fetch the front + // of the FE queue, then be swapped out, have someone else consume the FE's + // object, then come back, forward, and then try to remove it from the queue + // again. Without reservation of the FE, the methods accessing it must be locked. + // We could remember the keys of the objects we forwarded, and then remove + // them from the input ports after forwarding is complete? + if(tuple_build_may_succeed()) { // checks output queue of FE + do { + build_succeeded = try_to_make_tuple(out); // fetch front_end of queue + if(build_succeeded) { + graph_task *new_task = my_successors.try_put_task(out); + last_task = combine_tasks(my_graph, last_task, new_task); + if(new_task) { + tuple_accepted(); + } + else { + tuple_rejected(); + build_succeeded = false; + } + } + } while(build_succeeded); + } + current->bypass_t = last_task; + current->status.store( SUCCEEDED, std::memory_order_release); + forwarder_busy = false; + } + break; + } + } + } + // ---------- end aggregator ----------- + public: + join_node_base(graph &g) + : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + join_node_base(const join_node_base& other) : + graph_node(other.graph_node::my_graph), input_ports_type(other), + sender<OutputTuple>(), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + template<typename FunctionTuple> + join_node_base(graph &g, FunctionTuple f) + : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + bool register_successor(successor_type &r) override { + join_node_base_operation op_data(r, reg_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool remove_successor( successor_type &r) override { + join_node_base_operation op_data(r, rem_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool try_get( output_type &v) override { + join_node_base_operation op_data(v, try__get); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + protected: + void reset_node(reset_flags f) override { + input_ports_type::reset(f); + if(f & rf_clear_edges) my_successors.clear(); + } + + private: + broadcast_cache<output_type, null_rw_mutex> my_successors; + + friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >; + graph_task *forward_task() { + join_node_base_operation op_data(do_fwrd_bypass); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + }; // join_node_base + + // join base class type generator + template<int N, template<class> class PT, typename OutputTuple, typename JP> + struct join_base { + typedef join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type; + }; + + template<int N, typename OutputTuple, typename K, typename KHash> + struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > { + typedef key_matching<K, KHash> key_traits_type; + typedef K key_type; + typedef KHash key_hash_compare; + typedef join_node_base< key_traits_type, + // ports type + typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type, + OutputTuple > type; + }; + + //! unfolded_join_node : passes input_ports_type to join_node_base. We build the input port type + // using tuple_element. The class PT is the port type (reserving_port, queueing_port, key_matching_port) + // and should match the typename. + + template<int N, template<class> class PT, typename OutputTuple, typename JP> + class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type { + public: + typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<JP, input_ports_type, output_type > base_type; + public: + unfolded_join_node(graph &g) : base_type(g) {} + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + template <typename K, typename T> + struct key_from_message_body { + K operator()(const T& t) const { + return key_from_message<K>(t); + } + }; + // Adds const to reference type + template <typename K, typename T> + struct key_from_message_body<K&,T> { + const K& operator()(const T& t) const { + return key_from_message<const K&>(t); + } + }; +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + // key_matching unfolded_join_node. This must be a separate specialization because the constructors + // differ. + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + public: + typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef std::tuple< f0_p, f1_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1> + unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + public: + typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + public: + typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + public: + typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + +#if __TBB_VARIADIC_MAX >= 6 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + public: + typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5) + : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 7 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + public: + typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 8 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + public: + typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 9 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + typedef typename std::tuple_element<8, OutputTuple>::type T8; + public: + typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef type_to_key_function_body<T8, K> *f8_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), + new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7, typename Body8> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7), + new type_to_key_function_body_leaf<T8, K, Body8>(body8) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 10 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + typedef typename std::tuple_element<8, OutputTuple>::type T8; + typedef typename std::tuple_element<9, OutputTuple>::type T9; + public: + typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef type_to_key_function_body<T8, K> *f8_p; + typedef type_to_key_function_body<T9, K> *f9_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), + new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()), + new type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7, typename Body8, typename Body9> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7), + new type_to_key_function_body_leaf<T8, K, Body8>(body8), + new type_to_key_function_body_leaf<T9, K, Body9>(body9) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + + //! templated function to refer to input ports of the join node + template<size_t N, typename JNT> + typename std::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) { + return std::get<N>(jn.input_ports()); + } + +#endif // __TBB__flow_graph_join_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h index aca465d088..bf93efae5c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h @@ -1,769 +1,769 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_node_impl_H -#define __TBB__flow_graph_node_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -#include "_flow_graph_item_buffer_impl.h" - -template< typename T, typename A > -class function_input_queue : public item_buffer<T,A> { -public: - bool empty() const { - return this->buffer_empty(); - } - - const T& front() const { - return this->item_buffer<T, A>::front(); - } - - void pop() { - this->destroy_front(); - } - - bool push( T& t ) { - return this->push_back( t ); - } -}; - -//! Input and scheduling for a function node that takes a type Input as input -// The only up-ref is apply_body_impl, which should implement the function -// call and any handling of the result. -template< typename Input, typename Policy, typename A, typename ImplType > -class function_input_base : public receiver<Input>, no_assign { - enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency - }; - typedef function_input_base<Input, Policy, A, ImplType> class_type; - -public: - - //! The input type of this receiver - typedef Input input_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type; - typedef function_input_queue<input_type, A> input_queue_type; - typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type; - static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, ""); - - //! Constructor for function_input_base - function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) - : my_graph_ref(g), my_max_concurrency(max_concurrency) - , my_concurrency(0), my_priority(a_priority) - , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL) - , my_predecessors(this) - , forwarder_busy(false) - { - my_aggregator.initialize_handler(handler_type(this)); - } - - //! Copy constructor - function_input_base( const function_input_base& src ) - : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} - - //! Destructor - // The queue is allocated by the constructor for {multi}function_node. - // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. - // This would be an interface-breaking change. - virtual ~function_input_base() { - if ( my_queue ) delete my_queue; - } - - graph_task* try_put_task( const input_type& t) override { - return try_put_task_impl(t, has_policy<lightweight, Policy>()); - } - - //! Adds src to the list of cached predecessors. - bool register_predecessor( predecessor_type &src ) override { - operation_type op_data(reg_pred); - op_data.r = &src; - my_aggregator.execute(&op_data); - return true; - } - - //! Removes src from the list of cached predecessors. - bool remove_predecessor( predecessor_type &src ) override { - operation_type op_data(rem_pred); - op_data.r = &src; - my_aggregator.execute(&op_data); - return true; - } - -protected: - - void reset_function_input_base( reset_flags f) { - my_concurrency = 0; - if(my_queue) { - my_queue->reset(); - } - reset_receiver(f); - forwarder_busy = false; - } - - graph& my_graph_ref; - const size_t my_max_concurrency; - size_t my_concurrency; - node_priority_t my_priority; - input_queue_type *my_queue; - predecessor_cache<input_type, null_mutex > my_predecessors; - - void reset_receiver( reset_flags f) { - if( f & rf_clear_edges) my_predecessors.clear(); - else - my_predecessors.reset(); - __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); - } - - graph& graph_reference() const override { - return my_graph_ref; - } - - graph_task* try_get_postponed_task(const input_type& i) { - operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item - my_aggregator.execute(&op_data); - return op_data.bypass_t; - } - -private: - - friend class apply_body_task_bypass< class_type, input_type >; - friend class forward_task_bypass< class_type >; - - class operation_type : public aggregated_operation< operation_type > { - public: - char type; - union { - input_type *elem; - predecessor_type *r; - }; - graph_task* bypass_t; - operation_type(const input_type& e, op_type t) : - type(char(t)), elem(const_cast<input_type*>(&e)) {} - operation_type(op_type t) : type(char(t)), r(NULL) {} - }; - - bool forwarder_busy; - typedef aggregating_functor<class_type, operation_type> handler_type; - friend class aggregating_functor<class_type, operation_type>; - aggregator< handler_type, operation_type > my_aggregator; - - graph_task* perform_queued_requests() { - graph_task* new_task = NULL; - if(my_queue) { - if(!my_queue->empty()) { - ++my_concurrency; - new_task = create_body_task(my_queue->front()); - - my_queue->pop(); - } - } - else { - input_type i; - if(my_predecessors.get_item(i)) { - ++my_concurrency; - new_task = create_body_task(i); - } - } - return new_task; - } - void handle_operations(operation_type *op_list) { - operation_type* tmp; - while (op_list) { - tmp = op_list; - op_list = op_list->next; - switch (tmp->type) { - case reg_pred: - my_predecessors.add(*(tmp->r)); - tmp->status.store(SUCCEEDED, std::memory_order_release); - if (!forwarder_busy) { - forwarder_busy = true; - spawn_forward_task(); - } - break; - case rem_pred: - my_predecessors.remove(*(tmp->r)); - tmp->status.store(SUCCEEDED, std::memory_order_release); - break; - case app_body_bypass: { - tmp->bypass_t = NULL; - __TBB_ASSERT(my_max_concurrency != 0, NULL); - --my_concurrency; - if(my_concurrency<my_max_concurrency) - tmp->bypass_t = perform_queued_requests(); - tmp->status.store(SUCCEEDED, std::memory_order_release); - } - break; - case tryput_bypass: internal_try_put_task(tmp); break; - case try_fwd: internal_forward(tmp); break; - case occupy_concurrency: - if (my_concurrency < my_max_concurrency) { - ++my_concurrency; - tmp->status.store(SUCCEEDED, std::memory_order_release); - } else { - tmp->status.store(FAILED, std::memory_order_release); - } - break; - } - } - } - - //! Put to the node, but return the task instead of enqueueing it - void internal_try_put_task(operation_type *op) { - __TBB_ASSERT(my_max_concurrency != 0, NULL); - if (my_concurrency < my_max_concurrency) { - ++my_concurrency; - graph_task * new_task = create_body_task(*(op->elem)); - op->bypass_t = new_task; - op->status.store(SUCCEEDED, std::memory_order_release); - } else if ( my_queue && my_queue->push(*(op->elem)) ) { - op->bypass_t = SUCCESSFULLY_ENQUEUED; - op->status.store(SUCCEEDED, std::memory_order_release); - } else { - op->bypass_t = NULL; - op->status.store(FAILED, std::memory_order_release); - } - } - - //! Creates tasks for postponed messages if available and if concurrency allows - void internal_forward(operation_type *op) { - op->bypass_t = NULL; - if (my_concurrency < my_max_concurrency) - op->bypass_t = perform_queued_requests(); - if(op->bypass_t) - op->status.store(SUCCEEDED, std::memory_order_release); - else { - forwarder_busy = false; - op->status.store(FAILED, std::memory_order_release); - } - } - - graph_task* internal_try_put_bypass( const input_type& t ) { - operation_type op_data(t, tryput_bypass); - my_aggregator.execute(&op_data); - if( op_data.status == SUCCEEDED ) { - return op_data.bypass_t; - } - return NULL; - } - - graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { - if( my_max_concurrency == 0 ) { - return apply_body_bypass(t); - } else { - operation_type check_op(t, occupy_concurrency); - my_aggregator.execute(&check_op); - if( check_op.status == SUCCEEDED ) { - return apply_body_bypass(t); - } - return internal_try_put_bypass(t); - } - } - - graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { - if( my_max_concurrency == 0 ) { - return create_body_task(t); - } else { - return internal_try_put_bypass(t); - } - } - - //! Applies the body to the provided input - // then decides if more work is available - graph_task* apply_body_bypass( const input_type &i ) { - return static_cast<ImplType *>(this)->apply_body_impl_bypass(i); - } - - //! allocates a task to apply a body - graph_task* create_body_task( const input_type &input ) { - if (!is_graph_active(my_graph_ref)) { - return nullptr; - } - // TODO revamp: extract helper for common graph task allocation part - small_object_allocator allocator{}; - typedef apply_body_task_bypass<class_type, input_type> task_type; - graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority ); - graph_reference().reserve_wait(); - return t; - } - - //! This is executed by an enqueued task, the "forwarder" - graph_task* forward_task() { - operation_type op_data(try_fwd); - graph_task* rval = NULL; - do { - op_data.status = WAIT; - my_aggregator.execute(&op_data); - if(op_data.status == SUCCEEDED) { - graph_task* ttask = op_data.bypass_t; - __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); - rval = combine_tasks(my_graph_ref, rval, ttask); - } - } while (op_data.status == SUCCEEDED); - return rval; - } - - inline graph_task* create_forward_task() { - if (!is_graph_active(my_graph_ref)) { - return nullptr; - } - small_object_allocator allocator{}; - typedef forward_task_bypass<class_type> task_type; - graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority ); - graph_reference().reserve_wait(); - return t; - } - - //! Spawns a task that calls forward() - inline void spawn_forward_task() { - graph_task* tp = create_forward_task(); - if(tp) { - spawn_in_graph_arena(graph_reference(), *tp); - } - } - - node_priority_t priority() const override { return my_priority; } -}; // function_input_base - -//! Implements methods for a function node that takes a type Input as input and sends -// a type Output to its successors. -template< typename Input, typename Output, typename Policy, typename A> -class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > { -public: - typedef Input input_type; - typedef Output output_type; - typedef function_body<input_type, output_type> function_body_type; - typedef function_input<Input, Output, Policy,A> my_class; - typedef function_input_base<Input, Policy, A, my_class> base_type; - typedef function_input_queue<input_type, A> input_queue_type; - - // constructor - template<typename Body> - function_input( - graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) - : base_type(g, max_concurrency, a_priority) - , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) - , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { - } - - //! Copy constructor - function_input( const function_input& src ) : - base_type(src), - my_body( src.my_init_body->clone() ), - my_init_body(src.my_init_body->clone() ) { - } -#if __INTEL_COMPILER <= 2021 - // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited - // class while the parent class has the virtual keyword for the destrocutor. - virtual -#endif - ~function_input() { - delete my_body; - delete my_init_body; - } - - template< typename Body > - Body copy_function_object() { - function_body_type &body_ref = *this->my_body; - return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); - } - - output_type apply_body_impl( const input_type& i) { - // There is an extra copied needed to capture the - // body execution without the try_put - fgt_begin_body( my_body ); - output_type v = (*my_body)(i); - fgt_end_body( my_body ); - return v; - } - - //TODO: consider moving into the base class - graph_task* apply_body_impl_bypass( const input_type &i) { - output_type v = apply_body_impl(i); - graph_task* postponed_task = NULL; - if( base_type::my_max_concurrency != 0 ) { - postponed_task = base_type::try_get_postponed_task(i); - __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); - } - if( postponed_task ) { - // make the task available for other workers since we do not know successors' - // execution policy - spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); - } - graph_task* successor_task = successors().try_put_task(v); -#if _MSC_VER && !__INTEL_COMPILER -#pragma warning (push) -#pragma warning (disable: 4127) /* suppress conditional expression is constant */ -#endif - if(has_policy<lightweight, Policy>::value) { -#if _MSC_VER && !__INTEL_COMPILER -#pragma warning (pop) -#endif - if(!successor_task) { - // Return confirmative status since current - // node's body has been executed anyway - successor_task = SUCCESSFULLY_ENQUEUED; - } - } - return successor_task; - } - -protected: - - void reset_function_input(reset_flags f) { - base_type::reset_function_input_base(f); - if(f & rf_reset_bodies) { - function_body_type *tmp = my_init_body->clone(); - delete my_body; - my_body = tmp; - } - } - - function_body_type *my_body; - function_body_type *my_init_body; - virtual broadcast_cache<output_type > &successors() = 0; - -}; // function_input - - -// helper templates to clear the successor edges of the output ports of an multifunction_node -template<int N> struct clear_element { - template<typename P> static void clear_this(P &p) { - (void)std::get<N-1>(p).successors().clear(); - clear_element<N-1>::clear_this(p); - } -#if TBB_USE_ASSERT - template<typename P> static bool this_empty(P &p) { - if(std::get<N-1>(p).successors().empty()) - return clear_element<N-1>::this_empty(p); - return false; - } -#endif -}; - -template<> struct clear_element<1> { - template<typename P> static void clear_this(P &p) { - (void)std::get<0>(p).successors().clear(); - } -#if TBB_USE_ASSERT - template<typename P> static bool this_empty(P &p) { - return std::get<0>(p).successors().empty(); - } -#endif -}; - -template <typename OutputTuple> -struct init_output_ports { - template <typename... Args> - static OutputTuple call(graph& g, const std::tuple<Args...>&) { - return OutputTuple(Args(g)...); - } -}; // struct init_output_ports - -//! Implements methods for a function node that takes a type Input as input -// and has a tuple of output ports specified. -template< typename Input, typename OutputPortSet, typename Policy, typename A> -class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > { -public: - static const int N = std::tuple_size<OutputPortSet>::value; - typedef Input input_type; - typedef OutputPortSet output_ports_type; - typedef multifunction_body<input_type, output_ports_type> multifunction_body_type; - typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class; - typedef function_input_base<Input, Policy, A, my_class> base_type; - typedef function_input_queue<input_type, A> input_queue_type; - - // constructor - template<typename Body> - multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) - : base_type(g, max_concurrency, a_priority) - , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) - , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) - , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){ - } - - //! Copy constructor - multifunction_input( const multifunction_input& src ) : - base_type(src), - my_body( src.my_init_body->clone() ), - my_init_body(src.my_init_body->clone() ), - my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) { - } - - ~multifunction_input() { - delete my_body; - delete my_init_body; - } - - template< typename Body > - Body copy_function_object() { - multifunction_body_type &body_ref = *this->my_body; - return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr()); - } - - // for multifunction nodes we do not have a single successor as such. So we just tell - // the task we were successful. - //TODO: consider moving common parts with implementation in function_input into separate function - graph_task* apply_body_impl_bypass( const input_type &i ) { - fgt_begin_body( my_body ); - (*my_body)(i, my_output_ports); - fgt_end_body( my_body ); - graph_task* ttask = NULL; - if(base_type::my_max_concurrency != 0) { - ttask = base_type::try_get_postponed_task(i); - } - return ttask ? ttask : SUCCESSFULLY_ENQUEUED; - } - - output_ports_type &output_ports(){ return my_output_ports; } - -protected: - - void reset(reset_flags f) { - base_type::reset_function_input_base(f); - if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports); - if(f & rf_reset_bodies) { - multifunction_body_type* tmp = my_init_body->clone(); - delete my_body; - my_body = tmp; - } - __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed"); - } - - multifunction_body_type *my_body; - multifunction_body_type *my_init_body; - output_ports_type my_output_ports; - -}; // multifunction_input - -// template to refer to an output port of a multifunction_node -template<size_t N, typename MOP> -typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) { - return std::get<N>(op.output_ports()); -} - -inline void check_task_and_spawn(graph& g, graph_task* t) { - if (t && t != SUCCESSFULLY_ENQUEUED) { - spawn_in_graph_arena(g, *t); - } -} - -// helper structs for split_node -template<int N> -struct emit_element { - template<typename T, typename P> - static graph_task* emit_this(graph& g, const T &t, P &p) { - // TODO: consider to collect all the tasks in task_list and spawn them all at once - graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t)); - check_task_and_spawn(g, last_task); - return emit_element<N-1>::emit_this(g,t,p); - } -}; - -template<> -struct emit_element<1> { - template<typename T, typename P> - static graph_task* emit_this(graph& g, const T &t, P &p) { - graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); - check_task_and_spawn(g, last_task); - return SUCCESSFULLY_ENQUEUED; - } -}; - -//! Implements methods for an executable node that takes continue_msg as input -template< typename Output, typename Policy> -class continue_input : public continue_receiver { -public: - - //! The input type of this receiver - typedef continue_msg input_type; - - //! The output type of this receiver - typedef Output output_type; - typedef function_body<input_type, output_type> function_body_type; - typedef continue_input<output_type, Policy> class_type; - - template< typename Body > - continue_input( graph &g, Body& body, node_priority_t a_priority ) - : continue_receiver(/*number_of_predecessors=*/0, a_priority) - , my_graph_ref(g) - , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) - , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) - { } - - template< typename Body > - continue_input( graph &g, int number_of_predecessors, - Body& body, node_priority_t a_priority ) - : continue_receiver( number_of_predecessors, a_priority ) - , my_graph_ref(g) - , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) - , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) - { } - - continue_input( const continue_input& src ) : continue_receiver(src), - my_graph_ref(src.my_graph_ref), - my_body( src.my_init_body->clone() ), - my_init_body( src.my_init_body->clone() ) {} - - ~continue_input() { - delete my_body; - delete my_init_body; - } - - template< typename Body > - Body copy_function_object() { - function_body_type &body_ref = *my_body; - return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); - } - - void reset_receiver( reset_flags f) override { - continue_receiver::reset_receiver(f); - if(f & rf_reset_bodies) { - function_body_type *tmp = my_init_body->clone(); - delete my_body; - my_body = tmp; - } - } - -protected: - - graph& my_graph_ref; - function_body_type *my_body; - function_body_type *my_init_body; - - virtual broadcast_cache<output_type > &successors() = 0; - - friend class apply_body_task_bypass< class_type, continue_msg >; - - //! Applies the body to the provided input - graph_task* apply_body_bypass( input_type ) { - // There is an extra copied needed to capture the - // body execution without the try_put - fgt_begin_body( my_body ); - output_type v = (*my_body)( continue_msg() ); - fgt_end_body( my_body ); - return successors().try_put_task( v ); - } - - graph_task* execute() override { - if(!is_graph_active(my_graph_ref)) { - return NULL; - } -#if _MSC_VER && !__INTEL_COMPILER -#pragma warning (push) -#pragma warning (disable: 4127) /* suppress conditional expression is constant */ -#endif - if(has_policy<lightweight, Policy>::value) { -#if _MSC_VER && !__INTEL_COMPILER -#pragma warning (pop) -#endif - return apply_body_bypass( continue_msg() ); - } - else { - small_object_allocator allocator{}; - typedef apply_body_task_bypass<class_type, continue_msg> task_type; - graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); - graph_reference().reserve_wait(); - return t; - } - } - - graph& graph_reference() const override { - return my_graph_ref; - } -}; // continue_input - -//! Implements methods for both executable and function nodes that puts Output to its successors -template< typename Output > -class function_output : public sender<Output> { -public: - - template<int N> friend struct clear_element; - typedef Output output_type; - typedef typename sender<output_type>::successor_type successor_type; - typedef broadcast_cache<output_type> broadcast_cache_type; - - function_output(graph& g) : my_successors(this), my_graph_ref(g) {} - function_output(const function_output& other) = delete; - - //! Adds a new successor to this node - bool register_successor( successor_type &r ) override { - successors().register_successor( r ); - return true; - } - - //! Removes a successor from this node - bool remove_successor( successor_type &r ) override { - successors().remove_successor( r ); - return true; - } - - broadcast_cache_type &successors() { return my_successors; } - - graph& graph_reference() const { return my_graph_ref; } -protected: - broadcast_cache_type my_successors; - graph& my_graph_ref; -}; // function_output - -template< typename Output > -class multifunction_output : public function_output<Output> { -public: - typedef Output output_type; - typedef function_output<output_type> base_type; - using base_type::my_successors; - - multifunction_output(graph& g) : base_type(g) {} - multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} - - bool try_put(const output_type &i) { - graph_task *res = try_put_task(i); - if( !res ) return false; - if( res != SUCCESSFULLY_ENQUEUED ) { - // wrapping in task_arena::execute() is not needed since the method is called from - // inside task::execute() - spawn_in_graph_arena(graph_reference(), *res); - } - return true; - } - - using base_type::graph_reference; - -protected: - - graph_task* try_put_task(const output_type &i) { - return my_successors.try_put_task(i); - } - - template <int N> friend struct emit_element; - -}; // multifunction_output - -//composite_node -template<typename CompositeType> -void add_nodes_impl(CompositeType*, bool) {} - -template< typename CompositeType, typename NodeType1, typename... NodeTypes > -void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { - void *addr = const_cast<NodeType1 *>(&n1); - - fgt_alias_port(c_node, addr, visible); - add_nodes_impl(c_node, visible, n...); -} - -#endif // __TBB__flow_graph_node_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_node_impl_H +#define __TBB__flow_graph_node_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_flow_graph_item_buffer_impl.h" + +template< typename T, typename A > +class function_input_queue : public item_buffer<T,A> { +public: + bool empty() const { + return this->buffer_empty(); + } + + const T& front() const { + return this->item_buffer<T, A>::front(); + } + + void pop() { + this->destroy_front(); + } + + bool push( T& t ) { + return this->push_back( t ); + } +}; + +//! Input and scheduling for a function node that takes a type Input as input +// The only up-ref is apply_body_impl, which should implement the function +// call and any handling of the result. +template< typename Input, typename Policy, typename A, typename ImplType > +class function_input_base : public receiver<Input>, no_assign { + enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency + }; + typedef function_input_base<Input, Policy, A, ImplType> class_type; + +public: + + //! The input type of this receiver + typedef Input input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type; + typedef function_input_queue<input_type, A> input_queue_type; + typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type; + static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, ""); + + //! Constructor for function_input_base + function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) + : my_graph_ref(g), my_max_concurrency(max_concurrency) + , my_concurrency(0), my_priority(a_priority) + , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL) + , my_predecessors(this) + , forwarder_busy(false) + { + my_aggregator.initialize_handler(handler_type(this)); + } + + //! Copy constructor + function_input_base( const function_input_base& src ) + : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} + + //! Destructor + // The queue is allocated by the constructor for {multi}function_node. + // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. + // This would be an interface-breaking change. + virtual ~function_input_base() { + if ( my_queue ) delete my_queue; + } + + graph_task* try_put_task( const input_type& t) override { + return try_put_task_impl(t, has_policy<lightweight, Policy>()); + } + + //! Adds src to the list of cached predecessors. + bool register_predecessor( predecessor_type &src ) override { + operation_type op_data(reg_pred); + op_data.r = &src; + my_aggregator.execute(&op_data); + return true; + } + + //! Removes src from the list of cached predecessors. + bool remove_predecessor( predecessor_type &src ) override { + operation_type op_data(rem_pred); + op_data.r = &src; + my_aggregator.execute(&op_data); + return true; + } + +protected: + + void reset_function_input_base( reset_flags f) { + my_concurrency = 0; + if(my_queue) { + my_queue->reset(); + } + reset_receiver(f); + forwarder_busy = false; + } + + graph& my_graph_ref; + const size_t my_max_concurrency; + size_t my_concurrency; + node_priority_t my_priority; + input_queue_type *my_queue; + predecessor_cache<input_type, null_mutex > my_predecessors; + + void reset_receiver( reset_flags f) { + if( f & rf_clear_edges) my_predecessors.clear(); + else + my_predecessors.reset(); + __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); + } + + graph& graph_reference() const override { + return my_graph_ref; + } + + graph_task* try_get_postponed_task(const input_type& i) { + operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + +private: + + friend class apply_body_task_bypass< class_type, input_type >; + friend class forward_task_bypass< class_type >; + + class operation_type : public aggregated_operation< operation_type > { + public: + char type; + union { + input_type *elem; + predecessor_type *r; + }; + graph_task* bypass_t; + operation_type(const input_type& e, op_type t) : + type(char(t)), elem(const_cast<input_type*>(&e)) {} + operation_type(op_type t) : type(char(t)), r(NULL) {} + }; + + bool forwarder_busy; + typedef aggregating_functor<class_type, operation_type> handler_type; + friend class aggregating_functor<class_type, operation_type>; + aggregator< handler_type, operation_type > my_aggregator; + + graph_task* perform_queued_requests() { + graph_task* new_task = NULL; + if(my_queue) { + if(!my_queue->empty()) { + ++my_concurrency; + new_task = create_body_task(my_queue->front()); + + my_queue->pop(); + } + } + else { + input_type i; + if(my_predecessors.get_item(i)) { + ++my_concurrency; + new_task = create_body_task(i); + } + } + return new_task; + } + void handle_operations(operation_type *op_list) { + operation_type* tmp; + while (op_list) { + tmp = op_list; + op_list = op_list->next; + switch (tmp->type) { + case reg_pred: + my_predecessors.add(*(tmp->r)); + tmp->status.store(SUCCEEDED, std::memory_order_release); + if (!forwarder_busy) { + forwarder_busy = true; + spawn_forward_task(); + } + break; + case rem_pred: + my_predecessors.remove(*(tmp->r)); + tmp->status.store(SUCCEEDED, std::memory_order_release); + break; + case app_body_bypass: { + tmp->bypass_t = NULL; + __TBB_ASSERT(my_max_concurrency != 0, NULL); + --my_concurrency; + if(my_concurrency<my_max_concurrency) + tmp->bypass_t = perform_queued_requests(); + tmp->status.store(SUCCEEDED, std::memory_order_release); + } + break; + case tryput_bypass: internal_try_put_task(tmp); break; + case try_fwd: internal_forward(tmp); break; + case occupy_concurrency: + if (my_concurrency < my_max_concurrency) { + ++my_concurrency; + tmp->status.store(SUCCEEDED, std::memory_order_release); + } else { + tmp->status.store(FAILED, std::memory_order_release); + } + break; + } + } + } + + //! Put to the node, but return the task instead of enqueueing it + void internal_try_put_task(operation_type *op) { + __TBB_ASSERT(my_max_concurrency != 0, NULL); + if (my_concurrency < my_max_concurrency) { + ++my_concurrency; + graph_task * new_task = create_body_task(*(op->elem)); + op->bypass_t = new_task; + op->status.store(SUCCEEDED, std::memory_order_release); + } else if ( my_queue && my_queue->push(*(op->elem)) ) { + op->bypass_t = SUCCESSFULLY_ENQUEUED; + op->status.store(SUCCEEDED, std::memory_order_release); + } else { + op->bypass_t = NULL; + op->status.store(FAILED, std::memory_order_release); + } + } + + //! Creates tasks for postponed messages if available and if concurrency allows + void internal_forward(operation_type *op) { + op->bypass_t = NULL; + if (my_concurrency < my_max_concurrency) + op->bypass_t = perform_queued_requests(); + if(op->bypass_t) + op->status.store(SUCCEEDED, std::memory_order_release); + else { + forwarder_busy = false; + op->status.store(FAILED, std::memory_order_release); + } + } + + graph_task* internal_try_put_bypass( const input_type& t ) { + operation_type op_data(t, tryput_bypass); + my_aggregator.execute(&op_data); + if( op_data.status == SUCCEEDED ) { + return op_data.bypass_t; + } + return NULL; + } + + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { + if( my_max_concurrency == 0 ) { + return apply_body_bypass(t); + } else { + operation_type check_op(t, occupy_concurrency); + my_aggregator.execute(&check_op); + if( check_op.status == SUCCEEDED ) { + return apply_body_bypass(t); + } + return internal_try_put_bypass(t); + } + } + + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { + if( my_max_concurrency == 0 ) { + return create_body_task(t); + } else { + return internal_try_put_bypass(t); + } + } + + //! Applies the body to the provided input + // then decides if more work is available + graph_task* apply_body_bypass( const input_type &i ) { + return static_cast<ImplType *>(this)->apply_body_impl_bypass(i); + } + + //! allocates a task to apply a body + graph_task* create_body_task( const input_type &input ) { + if (!is_graph_active(my_graph_ref)) { + return nullptr; + } + // TODO revamp: extract helper for common graph task allocation part + small_object_allocator allocator{}; + typedef apply_body_task_bypass<class_type, input_type> task_type; + graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority ); + graph_reference().reserve_wait(); + return t; + } + + //! This is executed by an enqueued task, the "forwarder" + graph_task* forward_task() { + operation_type op_data(try_fwd); + graph_task* rval = NULL; + do { + op_data.status = WAIT; + my_aggregator.execute(&op_data); + if(op_data.status == SUCCEEDED) { + graph_task* ttask = op_data.bypass_t; + __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); + rval = combine_tasks(my_graph_ref, rval, ttask); + } + } while (op_data.status == SUCCEEDED); + return rval; + } + + inline graph_task* create_forward_task() { + if (!is_graph_active(my_graph_ref)) { + return nullptr; + } + small_object_allocator allocator{}; + typedef forward_task_bypass<class_type> task_type; + graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority ); + graph_reference().reserve_wait(); + return t; + } + + //! Spawns a task that calls forward() + inline void spawn_forward_task() { + graph_task* tp = create_forward_task(); + if(tp) { + spawn_in_graph_arena(graph_reference(), *tp); + } + } + + node_priority_t priority() const override { return my_priority; } +}; // function_input_base + +//! Implements methods for a function node that takes a type Input as input and sends +// a type Output to its successors. +template< typename Input, typename Output, typename Policy, typename A> +class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > { +public: + typedef Input input_type; + typedef Output output_type; + typedef function_body<input_type, output_type> function_body_type; + typedef function_input<Input, Output, Policy,A> my_class; + typedef function_input_base<Input, Policy, A, my_class> base_type; + typedef function_input_queue<input_type, A> input_queue_type; + + // constructor + template<typename Body> + function_input( + graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) + : base_type(g, max_concurrency, a_priority) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { + } + + //! Copy constructor + function_input( const function_input& src ) : + base_type(src), + my_body( src.my_init_body->clone() ), + my_init_body(src.my_init_body->clone() ) { + } +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif + ~function_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + function_body_type &body_ref = *this->my_body; + return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); + } + + output_type apply_body_impl( const input_type& i) { + // There is an extra copied needed to capture the + // body execution without the try_put + fgt_begin_body( my_body ); + output_type v = (*my_body)(i); + fgt_end_body( my_body ); + return v; + } + + //TODO: consider moving into the base class + graph_task* apply_body_impl_bypass( const input_type &i) { + output_type v = apply_body_impl(i); + graph_task* postponed_task = NULL; + if( base_type::my_max_concurrency != 0 ) { + postponed_task = base_type::try_get_postponed_task(i); + __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); + } + if( postponed_task ) { + // make the task available for other workers since we do not know successors' + // execution policy + spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); + } + graph_task* successor_task = successors().try_put_task(v); +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (push) +#pragma warning (disable: 4127) /* suppress conditional expression is constant */ +#endif + if(has_policy<lightweight, Policy>::value) { +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (pop) +#endif + if(!successor_task) { + // Return confirmative status since current + // node's body has been executed anyway + successor_task = SUCCESSFULLY_ENQUEUED; + } + } + return successor_task; + } + +protected: + + void reset_function_input(reset_flags f) { + base_type::reset_function_input_base(f); + if(f & rf_reset_bodies) { + function_body_type *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + + function_body_type *my_body; + function_body_type *my_init_body; + virtual broadcast_cache<output_type > &successors() = 0; + +}; // function_input + + +// helper templates to clear the successor edges of the output ports of an multifunction_node +template<int N> struct clear_element { + template<typename P> static void clear_this(P &p) { + (void)std::get<N-1>(p).successors().clear(); + clear_element<N-1>::clear_this(p); + } +#if TBB_USE_ASSERT + template<typename P> static bool this_empty(P &p) { + if(std::get<N-1>(p).successors().empty()) + return clear_element<N-1>::this_empty(p); + return false; + } +#endif +}; + +template<> struct clear_element<1> { + template<typename P> static void clear_this(P &p) { + (void)std::get<0>(p).successors().clear(); + } +#if TBB_USE_ASSERT + template<typename P> static bool this_empty(P &p) { + return std::get<0>(p).successors().empty(); + } +#endif +}; + +template <typename OutputTuple> +struct init_output_ports { + template <typename... Args> + static OutputTuple call(graph& g, const std::tuple<Args...>&) { + return OutputTuple(Args(g)...); + } +}; // struct init_output_ports + +//! Implements methods for a function node that takes a type Input as input +// and has a tuple of output ports specified. +template< typename Input, typename OutputPortSet, typename Policy, typename A> +class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > { +public: + static const int N = std::tuple_size<OutputPortSet>::value; + typedef Input input_type; + typedef OutputPortSet output_ports_type; + typedef multifunction_body<input_type, output_ports_type> multifunction_body_type; + typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class; + typedef function_input_base<Input, Policy, A, my_class> base_type; + typedef function_input_queue<input_type, A> input_queue_type; + + // constructor + template<typename Body> + multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) + : base_type(g, max_concurrency, a_priority) + , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) + , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) + , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){ + } + + //! Copy constructor + multifunction_input( const multifunction_input& src ) : + base_type(src), + my_body( src.my_init_body->clone() ), + my_init_body(src.my_init_body->clone() ), + my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) { + } + + ~multifunction_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + multifunction_body_type &body_ref = *this->my_body; + return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr()); + } + + // for multifunction nodes we do not have a single successor as such. So we just tell + // the task we were successful. + //TODO: consider moving common parts with implementation in function_input into separate function + graph_task* apply_body_impl_bypass( const input_type &i ) { + fgt_begin_body( my_body ); + (*my_body)(i, my_output_ports); + fgt_end_body( my_body ); + graph_task* ttask = NULL; + if(base_type::my_max_concurrency != 0) { + ttask = base_type::try_get_postponed_task(i); + } + return ttask ? ttask : SUCCESSFULLY_ENQUEUED; + } + + output_ports_type &output_ports(){ return my_output_ports; } + +protected: + + void reset(reset_flags f) { + base_type::reset_function_input_base(f); + if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports); + if(f & rf_reset_bodies) { + multifunction_body_type* tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed"); + } + + multifunction_body_type *my_body; + multifunction_body_type *my_init_body; + output_ports_type my_output_ports; + +}; // multifunction_input + +// template to refer to an output port of a multifunction_node +template<size_t N, typename MOP> +typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) { + return std::get<N>(op.output_ports()); +} + +inline void check_task_and_spawn(graph& g, graph_task* t) { + if (t && t != SUCCESSFULLY_ENQUEUED) { + spawn_in_graph_arena(g, *t); + } +} + +// helper structs for split_node +template<int N> +struct emit_element { + template<typename T, typename P> + static graph_task* emit_this(graph& g, const T &t, P &p) { + // TODO: consider to collect all the tasks in task_list and spawn them all at once + graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t)); + check_task_and_spawn(g, last_task); + return emit_element<N-1>::emit_this(g,t,p); + } +}; + +template<> +struct emit_element<1> { + template<typename T, typename P> + static graph_task* emit_this(graph& g, const T &t, P &p) { + graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); + check_task_and_spawn(g, last_task); + return SUCCESSFULLY_ENQUEUED; + } +}; + +//! Implements methods for an executable node that takes continue_msg as input +template< typename Output, typename Policy> +class continue_input : public continue_receiver { +public: + + //! The input type of this receiver + typedef continue_msg input_type; + + //! The output type of this receiver + typedef Output output_type; + typedef function_body<input_type, output_type> function_body_type; + typedef continue_input<output_type, Policy> class_type; + + template< typename Body > + continue_input( graph &g, Body& body, node_priority_t a_priority ) + : continue_receiver(/*number_of_predecessors=*/0, a_priority) + , my_graph_ref(g) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) + { } + + template< typename Body > + continue_input( graph &g, int number_of_predecessors, + Body& body, node_priority_t a_priority ) + : continue_receiver( number_of_predecessors, a_priority ) + , my_graph_ref(g) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) + { } + + continue_input( const continue_input& src ) : continue_receiver(src), + my_graph_ref(src.my_graph_ref), + my_body( src.my_init_body->clone() ), + my_init_body( src.my_init_body->clone() ) {} + + ~continue_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + function_body_type &body_ref = *my_body; + return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); + } + + void reset_receiver( reset_flags f) override { + continue_receiver::reset_receiver(f); + if(f & rf_reset_bodies) { + function_body_type *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + +protected: + + graph& my_graph_ref; + function_body_type *my_body; + function_body_type *my_init_body; + + virtual broadcast_cache<output_type > &successors() = 0; + + friend class apply_body_task_bypass< class_type, continue_msg >; + + //! Applies the body to the provided input + graph_task* apply_body_bypass( input_type ) { + // There is an extra copied needed to capture the + // body execution without the try_put + fgt_begin_body( my_body ); + output_type v = (*my_body)( continue_msg() ); + fgt_end_body( my_body ); + return successors().try_put_task( v ); + } + + graph_task* execute() override { + if(!is_graph_active(my_graph_ref)) { + return NULL; + } +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (push) +#pragma warning (disable: 4127) /* suppress conditional expression is constant */ +#endif + if(has_policy<lightweight, Policy>::value) { +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (pop) +#endif + return apply_body_bypass( continue_msg() ); + } + else { + small_object_allocator allocator{}; + typedef apply_body_task_bypass<class_type, continue_msg> task_type; + graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); + graph_reference().reserve_wait(); + return t; + } + } + + graph& graph_reference() const override { + return my_graph_ref; + } +}; // continue_input + +//! Implements methods for both executable and function nodes that puts Output to its successors +template< typename Output > +class function_output : public sender<Output> { +public: + + template<int N> friend struct clear_element; + typedef Output output_type; + typedef typename sender<output_type>::successor_type successor_type; + typedef broadcast_cache<output_type> broadcast_cache_type; + + function_output(graph& g) : my_successors(this), my_graph_ref(g) {} + function_output(const function_output& other) = delete; + + //! Adds a new successor to this node + bool register_successor( successor_type &r ) override { + successors().register_successor( r ); + return true; + } + + //! Removes a successor from this node + bool remove_successor( successor_type &r ) override { + successors().remove_successor( r ); + return true; + } + + broadcast_cache_type &successors() { return my_successors; } + + graph& graph_reference() const { return my_graph_ref; } +protected: + broadcast_cache_type my_successors; + graph& my_graph_ref; +}; // function_output + +template< typename Output > +class multifunction_output : public function_output<Output> { +public: + typedef Output output_type; + typedef function_output<output_type> base_type; + using base_type::my_successors; + + multifunction_output(graph& g) : base_type(g) {} + multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} + + bool try_put(const output_type &i) { + graph_task *res = try_put_task(i); + if( !res ) return false; + if( res != SUCCESSFULLY_ENQUEUED ) { + // wrapping in task_arena::execute() is not needed since the method is called from + // inside task::execute() + spawn_in_graph_arena(graph_reference(), *res); + } + return true; + } + + using base_type::graph_reference; + +protected: + + graph_task* try_put_task(const output_type &i) { + return my_successors.try_put_task(i); + } + + template <int N> friend struct emit_element; + +}; // multifunction_output + +//composite_node +template<typename CompositeType> +void add_nodes_impl(CompositeType*, bool) {} + +template< typename CompositeType, typename NodeType1, typename... NodeTypes > +void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { + void *addr = const_cast<NodeType1 *>(&n1); + + fgt_alias_port(c_node, addr, visible); + add_nodes_impl(c_node, visible, n...); +} + +#endif // __TBB__flow_graph_node_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h index ce867121f9..24f720f816 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h @@ -1,265 +1,265 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_flow_graph_node_set_impl_H -#define __TBB_flow_graph_node_set_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// Included in namespace tbb::detail::d1 (in flow_graph.h) - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get -// Seems like the well-formed expression in trailing decltype is treated as ill-formed -// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution -#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900) - -namespace order { -struct undefined {}; -struct following {}; -struct preceding {}; -} - -class get_graph_helper { -public: - // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph - // and remove get_graph_helper - template <typename T> - static graph& get(const T& object) { - return get_impl(object, std::is_base_of<graph_node, T>()); - } - -private: - // Get graph from the object of type derived from graph_node - template <typename T> - static graph& get_impl(const T& object, std::true_type) { - return static_cast<const graph_node*>(&object)->my_graph; - } - - template <typename T> - static graph& get_impl(const T& object, std::false_type) { - return object.graph_reference(); - } -}; - -template<typename Order, typename... Nodes> -struct node_set { - typedef Order order_type; - - std::tuple<Nodes&...> nodes; - node_set(Nodes&... ns) : nodes(ns...) {} - - template <typename... Nodes2> - node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {} - - graph& graph_reference() const { - return get_graph_helper::get(std::get<0>(nodes)); - } -}; - -namespace alias_helpers { -template <typename T> using output_type = typename T::output_type; -template <typename T> using output_ports_type = typename T::output_ports_type; -template <typename T> using input_type = typename T::input_type; -template <typename T> using input_ports_type = typename T::input_ports_type; -} // namespace alias_helpers - -template <typename T> -using has_output_type = supports<T, alias_helpers::output_type>; - -template <typename T> -using has_input_type = supports<T, alias_helpers::input_type>; - -template <typename T> -using has_input_ports_type = supports<T, alias_helpers::input_ports_type>; - -template <typename T> -using has_output_ports_type = supports<T, alias_helpers::output_ports_type>; - -template<typename T> -struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {}; - -template<typename T> -struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {}; - -template <typename Node> -struct is_async_node : std::false_type {}; - -template <typename... Args> -struct is_async_node<async_node<Args...>> : std::true_type {}; - -template<typename FirstPredecessor, typename... Predecessors> -node_set<order::following, FirstPredecessor, Predecessors...> -follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) { - static_assert((conjunction<has_output_type<FirstPredecessor>, - has_output_type<Predecessors>...>::value), - "Not all node's predecessors has output_type typedef"); - static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value), - "Not all node's predecessors are senders"); - return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...); -} - -template<typename... Predecessors> -node_set<order::following, Predecessors...> -follows(node_set<order::undefined, Predecessors...>& predecessors_set) { - static_assert((conjunction<has_output_type<Predecessors>...>::value), - "Not all nodes in the set has output_type typedef"); - static_assert((conjunction<is_sender<Predecessors>...>::value), - "Not all nodes in the set are senders"); - return node_set<order::following, Predecessors...>(predecessors_set); -} - -template<typename FirstSuccessor, typename... Successors> -node_set<order::preceding, FirstSuccessor, Successors...> -precedes(FirstSuccessor& first_successor, Successors&... successors) { - static_assert((conjunction<has_input_type<FirstSuccessor>, - has_input_type<Successors>...>::value), - "Not all node's successors has input_type typedef"); - static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value), - "Not all node's successors are receivers"); - return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...); -} - -template<typename... Successors> -node_set<order::preceding, Successors...> -precedes(node_set<order::undefined, Successors...>& successors_set) { - static_assert((conjunction<has_input_type<Successors>...>::value), - "Not all nodes in the set has input_type typedef"); - static_assert((conjunction<is_receiver<Successors>...>::value), - "Not all nodes in the set are receivers"); - return node_set<order::preceding, Successors...>(successors_set); -} - -template <typename Node, typename... Nodes> -node_set<order::undefined, Node, Nodes...> -make_node_set(Node& first_node, Nodes&... nodes) { - return node_set<order::undefined, Node, Nodes...>(first_node, nodes...); -} - -template<size_t I> -class successor_selector { - template <typename NodeType> - static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) { - return input_port<I>(node); - } - - template <typename NodeType> - static NodeType& get_impl(NodeType& node, std::false_type) { return node; } - -public: - template <typename NodeType> -#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE - static auto& get(NodeType& node) -#else - static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>())) -#endif - { - return get_impl(node, has_input_ports_type<NodeType>()); - } -}; - -template<size_t I> -class predecessor_selector { - template <typename NodeType> - static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) { - return output_port<I>(node); - } - - template <typename NodeType> - static NodeType& internal_get(NodeType& node, std::false_type) { return node;} - - template <typename NodeType> -#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE - static auto& get_impl(NodeType& node, std::false_type) -#else - static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>())) -#endif - { - return internal_get(node, has_output_ports_type<NodeType>()); - } - - template <typename AsyncNode> - static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; } - -public: - template <typename NodeType> -#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE - static auto& get(NodeType& node) -#else - static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>())) -#endif - { - return get_impl(node, is_async_node<NodeType>()); - } -}; - -template<size_t I> -class make_edges_helper { -public: - template<typename PredecessorsTuple, typename NodeType> - static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { - make_edge(std::get<I>(predecessors), successor_selector<I>::get(node)); - make_edges_helper<I - 1>::connect_predecessors(predecessors, node); - } - - template<typename SuccessorsTuple, typename NodeType> - static void connect_successors(NodeType& node, SuccessorsTuple& successors) { - make_edge(predecessor_selector<I>::get(node), std::get<I>(successors)); - make_edges_helper<I - 1>::connect_successors(node, successors); - } -}; - -template<> -struct make_edges_helper<0> { - template<typename PredecessorsTuple, typename NodeType> - static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { - make_edge(std::get<0>(predecessors), successor_selector<0>::get(node)); - } - - template<typename SuccessorsTuple, typename NodeType> - static void connect_successors(NodeType& node, SuccessorsTuple& successors) { - make_edge(predecessor_selector<0>::get(node), std::get<0>(successors)); - } -}; - -// TODO: consider adding an overload for making edges between node sets -template<typename NodeType, typename OrderFlagType, typename... Args> -void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) { - const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; - make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node); -} - -template <typename NodeType, typename OrderFlagType, typename... Args> -void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) { - const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; - make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes); -} - -template <typename NodeType, typename... Nodes> -void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) { - make_edges(ns, node); -} - -template <typename NodeType, typename... Nodes> -void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) { - make_edges(node, ns); -} - -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - -#endif // __TBB_flow_graph_node_set_impl_H +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_node_set_impl_H +#define __TBB_flow_graph_node_set_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// Included in namespace tbb::detail::d1 (in flow_graph.h) + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get +// Seems like the well-formed expression in trailing decltype is treated as ill-formed +// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution +#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900) + +namespace order { +struct undefined {}; +struct following {}; +struct preceding {}; +} + +class get_graph_helper { +public: + // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph + // and remove get_graph_helper + template <typename T> + static graph& get(const T& object) { + return get_impl(object, std::is_base_of<graph_node, T>()); + } + +private: + // Get graph from the object of type derived from graph_node + template <typename T> + static graph& get_impl(const T& object, std::true_type) { + return static_cast<const graph_node*>(&object)->my_graph; + } + + template <typename T> + static graph& get_impl(const T& object, std::false_type) { + return object.graph_reference(); + } +}; + +template<typename Order, typename... Nodes> +struct node_set { + typedef Order order_type; + + std::tuple<Nodes&...> nodes; + node_set(Nodes&... ns) : nodes(ns...) {} + + template <typename... Nodes2> + node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {} + + graph& graph_reference() const { + return get_graph_helper::get(std::get<0>(nodes)); + } +}; + +namespace alias_helpers { +template <typename T> using output_type = typename T::output_type; +template <typename T> using output_ports_type = typename T::output_ports_type; +template <typename T> using input_type = typename T::input_type; +template <typename T> using input_ports_type = typename T::input_ports_type; +} // namespace alias_helpers + +template <typename T> +using has_output_type = supports<T, alias_helpers::output_type>; + +template <typename T> +using has_input_type = supports<T, alias_helpers::input_type>; + +template <typename T> +using has_input_ports_type = supports<T, alias_helpers::input_ports_type>; + +template <typename T> +using has_output_ports_type = supports<T, alias_helpers::output_ports_type>; + +template<typename T> +struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {}; + +template<typename T> +struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {}; + +template <typename Node> +struct is_async_node : std::false_type {}; + +template <typename... Args> +struct is_async_node<async_node<Args...>> : std::true_type {}; + +template<typename FirstPredecessor, typename... Predecessors> +node_set<order::following, FirstPredecessor, Predecessors...> +follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) { + static_assert((conjunction<has_output_type<FirstPredecessor>, + has_output_type<Predecessors>...>::value), + "Not all node's predecessors has output_type typedef"); + static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value), + "Not all node's predecessors are senders"); + return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...); +} + +template<typename... Predecessors> +node_set<order::following, Predecessors...> +follows(node_set<order::undefined, Predecessors...>& predecessors_set) { + static_assert((conjunction<has_output_type<Predecessors>...>::value), + "Not all nodes in the set has output_type typedef"); + static_assert((conjunction<is_sender<Predecessors>...>::value), + "Not all nodes in the set are senders"); + return node_set<order::following, Predecessors...>(predecessors_set); +} + +template<typename FirstSuccessor, typename... Successors> +node_set<order::preceding, FirstSuccessor, Successors...> +precedes(FirstSuccessor& first_successor, Successors&... successors) { + static_assert((conjunction<has_input_type<FirstSuccessor>, + has_input_type<Successors>...>::value), + "Not all node's successors has input_type typedef"); + static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value), + "Not all node's successors are receivers"); + return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...); +} + +template<typename... Successors> +node_set<order::preceding, Successors...> +precedes(node_set<order::undefined, Successors...>& successors_set) { + static_assert((conjunction<has_input_type<Successors>...>::value), + "Not all nodes in the set has input_type typedef"); + static_assert((conjunction<is_receiver<Successors>...>::value), + "Not all nodes in the set are receivers"); + return node_set<order::preceding, Successors...>(successors_set); +} + +template <typename Node, typename... Nodes> +node_set<order::undefined, Node, Nodes...> +make_node_set(Node& first_node, Nodes&... nodes) { + return node_set<order::undefined, Node, Nodes...>(first_node, nodes...); +} + +template<size_t I> +class successor_selector { + template <typename NodeType> + static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) { + return input_port<I>(node); + } + + template <typename NodeType> + static NodeType& get_impl(NodeType& node, std::false_type) { return node; } + +public: + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get(NodeType& node) +#else + static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>())) +#endif + { + return get_impl(node, has_input_ports_type<NodeType>()); + } +}; + +template<size_t I> +class predecessor_selector { + template <typename NodeType> + static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) { + return output_port<I>(node); + } + + template <typename NodeType> + static NodeType& internal_get(NodeType& node, std::false_type) { return node;} + + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get_impl(NodeType& node, std::false_type) +#else + static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>())) +#endif + { + return internal_get(node, has_output_ports_type<NodeType>()); + } + + template <typename AsyncNode> + static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; } + +public: + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get(NodeType& node) +#else + static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>())) +#endif + { + return get_impl(node, is_async_node<NodeType>()); + } +}; + +template<size_t I> +class make_edges_helper { +public: + template<typename PredecessorsTuple, typename NodeType> + static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { + make_edge(std::get<I>(predecessors), successor_selector<I>::get(node)); + make_edges_helper<I - 1>::connect_predecessors(predecessors, node); + } + + template<typename SuccessorsTuple, typename NodeType> + static void connect_successors(NodeType& node, SuccessorsTuple& successors) { + make_edge(predecessor_selector<I>::get(node), std::get<I>(successors)); + make_edges_helper<I - 1>::connect_successors(node, successors); + } +}; + +template<> +struct make_edges_helper<0> { + template<typename PredecessorsTuple, typename NodeType> + static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { + make_edge(std::get<0>(predecessors), successor_selector<0>::get(node)); + } + + template<typename SuccessorsTuple, typename NodeType> + static void connect_successors(NodeType& node, SuccessorsTuple& successors) { + make_edge(predecessor_selector<0>::get(node), std::get<0>(successors)); + } +}; + +// TODO: consider adding an overload for making edges between node sets +template<typename NodeType, typename OrderFlagType, typename... Args> +void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) { + const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; + make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node); +} + +template <typename NodeType, typename OrderFlagType, typename... Args> +void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) { + const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; + make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes); +} + +template <typename NodeType, typename... Nodes> +void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) { + make_edges(ns, node); +} + +template <typename NodeType, typename... Nodes> +void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) { + make_edges(node, ns); +} + +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +#endif // __TBB_flow_graph_node_set_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h index 8c20993795..7b325f0cf7 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h @@ -1,277 +1,277 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_flow_graph_nodes_deduction_H -#define __TBB_flow_graph_nodes_deduction_H - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Input, typename Output> -struct declare_body_types { - using input_type = Input; - using output_type = Output; -}; - -struct NoInputBody {}; - -template <typename Output> -struct declare_body_types<NoInputBody, Output> { - using output_type = Output; -}; - -template <typename T> struct body_types; - -template <typename T, typename Input, typename Output> -struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {}; - -template <typename T, typename Input, typename Output> -struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {}; - -template <typename T, typename Input, typename Output> -struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {}; - -template <typename T, typename Input, typename Output> -struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {}; - -template <typename T, typename Output> -struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {}; - -template <typename T, typename Output> -struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; - -template <typename Input, typename Output> -struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {}; - -template <typename Input, typename Output> -struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {}; - -template <typename Output> -struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; - -template <typename Body> -using input_t = typename body_types<Body>::input_type; - -template <typename Body> -using output_t = typename body_types<Body>::output_type; - -template <typename T, typename Input, typename Output> -auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name); - -template <typename T, typename Input, typename Output> -auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name); - -template <typename T, typename Input, typename Output> -auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name); - -template <typename T, typename Input, typename Output> -auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name); - -template <typename Input, typename Output> -auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name); - -template <typename Input, typename Output> -auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name); - -template <typename Body> -decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int); - -template <typename Body> -decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...); - -// Deduction guides for Flow Graph nodes - -template <typename GraphOrSet, typename Body> -input_node(GraphOrSet&&, Body) -->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - -template <typename NodeSet> -struct decide_on_set; - -template <typename Node, typename... Nodes> -struct decide_on_set<node_set<order::following, Node, Nodes...>> { - using type = typename Node::output_type; -}; - -template <typename Node, typename... Nodes> -struct decide_on_set<node_set<order::preceding, Node, Nodes...>> { - using type = typename Node::input_type; -}; - -template <typename NodeSet> -using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type; - -template <typename NodeSet> -broadcast_node(const NodeSet&) -->broadcast_node<decide_on_set_t<NodeSet>>; - -template <typename NodeSet> -buffer_node(const NodeSet&) -->buffer_node<decide_on_set_t<NodeSet>>; - -template <typename NodeSet> -queue_node(const NodeSet&) -->queue_node<decide_on_set_t<NodeSet>>; -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - -template <typename GraphOrProxy, typename Sequencer> -sequencer_node(GraphOrProxy&&, Sequencer) -->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -template <typename NodeSet, typename Compare> -priority_queue_node(const NodeSet&, const Compare&) -->priority_queue_node<decide_on_set_t<NodeSet>, Compare>; - -template <typename NodeSet> -priority_queue_node(const NodeSet&) -->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>; -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - -template <typename Key> -struct join_key { - using type = Key; -}; - -template <typename T> -struct join_key<const T&> { - using type = T&; -}; - -template <typename Key> -using join_key_t = typename join_key<Key>::type; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -template <typename Policy, typename... Predecessors> -join_node(const node_set<order::following, Predecessors...>&, Policy) -->join_node<std::tuple<typename Predecessors::output_type...>, - Policy>; - -template <typename Policy, typename Successor, typename... Successors> -join_node(const node_set<order::preceding, Successor, Successors...>&, Policy) -->join_node<typename Successor::input_type, Policy>; - -template <typename... Predecessors> -join_node(const node_set<order::following, Predecessors...>) -->join_node<std::tuple<typename Predecessors::output_type...>, - queueing>; - -template <typename Successor, typename... Successors> -join_node(const node_set<order::preceding, Successor, Successors...>) -->join_node<typename Successor::input_type, queueing>; -#endif - -template <typename GraphOrProxy, typename Body, typename... Bodies> -join_node(GraphOrProxy&&, Body, Bodies...) -->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>, - input_t<decltype(decide_on_callable_type<Bodies>(0))>...>, - key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -template <typename... Predecessors> -indexer_node(const node_set<order::following, Predecessors...>&) -->indexer_node<typename Predecessors::output_type...>; -#endif - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -template <typename NodeSet> -limiter_node(const NodeSet&, size_t) -->limiter_node<decide_on_set_t<NodeSet>>; - -template <typename Predecessor, typename... Predecessors> -split_node(const node_set<order::following, Predecessor, Predecessors...>&) -->split_node<typename Predecessor::output_type>; - -template <typename... Successors> -split_node(const node_set<order::preceding, Successors...>&) -->split_node<std::tuple<typename Successors::input_type...>>; - -#endif - -template <typename GraphOrSet, typename Body, typename Policy> -function_node(GraphOrSet&&, - size_t, Body, - Policy, node_priority_t = no_priority) -->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, - output_t<decltype(decide_on_callable_type<Body>(0))>, - Policy>; - -template <typename GraphOrSet, typename Body> -function_node(GraphOrSet&&, size_t, - Body, node_priority_t = no_priority) -->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, - output_t<decltype(decide_on_callable_type<Body>(0))>, - queueing>; - -template <typename Output> -struct continue_output { - using type = Output; -}; - -template <> -struct continue_output<void> { - using type = continue_msg; -}; - -template <typename T> -using continue_output_t = typename continue_output<T>::type; - -template <typename GraphOrSet, typename Body, typename Policy> -continue_node(GraphOrSet&&, Body, - Policy, node_priority_t = no_priority) -->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, - Policy>; - -template <typename GraphOrSet, typename Body, typename Policy> -continue_node(GraphOrSet&&, - int, Body, - Policy, node_priority_t = no_priority) -->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, - Policy>; - -template <typename GraphOrSet, typename Body> -continue_node(GraphOrSet&&, - Body, node_priority_t = no_priority) -->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>; - -template <typename GraphOrSet, typename Body> -continue_node(GraphOrSet&&, int, - Body, node_priority_t = no_priority) -->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, - Policy<void>>; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - -template <typename NodeSet> -overwrite_node(const NodeSet&) -->overwrite_node<decide_on_set_t<NodeSet>>; - -template <typename NodeSet> -write_once_node(const NodeSet&) -->write_once_node<decide_on_set_t<NodeSet>>; -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -#endif // __TBB_flow_graph_nodes_deduction_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_nodes_deduction_H +#define __TBB_flow_graph_nodes_deduction_H + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Input, typename Output> +struct declare_body_types { + using input_type = Input; + using output_type = Output; +}; + +struct NoInputBody {}; + +template <typename Output> +struct declare_body_types<NoInputBody, Output> { + using output_type = Output; +}; + +template <typename T> struct body_types; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {}; + +template <typename T, typename Output> +struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {}; + +template <typename T, typename Output> +struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; + +template <typename Input, typename Output> +struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {}; + +template <typename Input, typename Output> +struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {}; + +template <typename Output> +struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; + +template <typename Body> +using input_t = typename body_types<Body>::input_type; + +template <typename Body> +using output_t = typename body_types<Body>::output_type; + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name); + +template <typename Input, typename Output> +auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name); + +template <typename Input, typename Output> +auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name); + +template <typename Body> +decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int); + +template <typename Body> +decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...); + +// Deduction guides for Flow Graph nodes + +template <typename GraphOrSet, typename Body> +input_node(GraphOrSet&&, Body) +->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename NodeSet> +struct decide_on_set; + +template <typename Node, typename... Nodes> +struct decide_on_set<node_set<order::following, Node, Nodes...>> { + using type = typename Node::output_type; +}; + +template <typename Node, typename... Nodes> +struct decide_on_set<node_set<order::preceding, Node, Nodes...>> { + using type = typename Node::input_type; +}; + +template <typename NodeSet> +using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type; + +template <typename NodeSet> +broadcast_node(const NodeSet&) +->broadcast_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +buffer_node(const NodeSet&) +->buffer_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +queue_node(const NodeSet&) +->queue_node<decide_on_set_t<NodeSet>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename GraphOrProxy, typename Sequencer> +sequencer_node(GraphOrProxy&&, Sequencer) +->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename NodeSet, typename Compare> +priority_queue_node(const NodeSet&, const Compare&) +->priority_queue_node<decide_on_set_t<NodeSet>, Compare>; + +template <typename NodeSet> +priority_queue_node(const NodeSet&) +->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename Key> +struct join_key { + using type = Key; +}; + +template <typename T> +struct join_key<const T&> { + using type = T&; +}; + +template <typename Key> +using join_key_t = typename join_key<Key>::type; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename Policy, typename... Predecessors> +join_node(const node_set<order::following, Predecessors...>&, Policy) +->join_node<std::tuple<typename Predecessors::output_type...>, + Policy>; + +template <typename Policy, typename Successor, typename... Successors> +join_node(const node_set<order::preceding, Successor, Successors...>&, Policy) +->join_node<typename Successor::input_type, Policy>; + +template <typename... Predecessors> +join_node(const node_set<order::following, Predecessors...>) +->join_node<std::tuple<typename Predecessors::output_type...>, + queueing>; + +template <typename Successor, typename... Successors> +join_node(const node_set<order::preceding, Successor, Successors...>) +->join_node<typename Successor::input_type, queueing>; +#endif + +template <typename GraphOrProxy, typename Body, typename... Bodies> +join_node(GraphOrProxy&&, Body, Bodies...) +->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>, + input_t<decltype(decide_on_callable_type<Bodies>(0))>...>, + key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename... Predecessors> +indexer_node(const node_set<order::following, Predecessors...>&) +->indexer_node<typename Predecessors::output_type...>; +#endif + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename NodeSet> +limiter_node(const NodeSet&, size_t) +->limiter_node<decide_on_set_t<NodeSet>>; + +template <typename Predecessor, typename... Predecessors> +split_node(const node_set<order::following, Predecessor, Predecessors...>&) +->split_node<typename Predecessor::output_type>; + +template <typename... Successors> +split_node(const node_set<order::preceding, Successors...>&) +->split_node<std::tuple<typename Successors::input_type...>>; + +#endif + +template <typename GraphOrSet, typename Body, typename Policy> +function_node(GraphOrSet&&, + size_t, Body, + Policy, node_priority_t = no_priority) +->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, + output_t<decltype(decide_on_callable_type<Body>(0))>, + Policy>; + +template <typename GraphOrSet, typename Body> +function_node(GraphOrSet&&, size_t, + Body, node_priority_t = no_priority) +->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, + output_t<decltype(decide_on_callable_type<Body>(0))>, + queueing>; + +template <typename Output> +struct continue_output { + using type = Output; +}; + +template <> +struct continue_output<void> { + using type = continue_msg; +}; + +template <typename T> +using continue_output_t = typename continue_output<T>::type; + +template <typename GraphOrSet, typename Body, typename Policy> +continue_node(GraphOrSet&&, Body, + Policy, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy>; + +template <typename GraphOrSet, typename Body, typename Policy> +continue_node(GraphOrSet&&, + int, Body, + Policy, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy>; + +template <typename GraphOrSet, typename Body> +continue_node(GraphOrSet&&, + Body, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>; + +template <typename GraphOrSet, typename Body> +continue_node(GraphOrSet&&, int, + Body, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy<void>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename NodeSet> +overwrite_node(const NodeSet&) +->overwrite_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +write_once_node(const NodeSet&) +->write_once_node<decide_on_set_t<NodeSet>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +#endif // __TBB_flow_graph_nodes_deduction_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h index 0c4580a199..f9bc3d3369 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h @@ -1,256 +1,256 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// a hash table buffer that can expand, and can support as many deletions as -// additions, list-based, with elements of list held in array (for destruction -// management), multiplicative hashing (like ets). No synchronization built-in. -// - -#ifndef __TBB__flow_graph_hash_buffer_impl_H -#define __TBB__flow_graph_hash_buffer_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included in namespace tbb::flow::interfaceX::internal - -// elements in the table are a simple list; we need pointer to next element to -// traverse the chain -template<typename ValueType> -struct buffer_element_type { - // the second parameter below is void * because we can't forward-declare the type - // itself, so we just reinterpret_cast below. - typedef typename aligned_pair<ValueType, void *>::type type; -}; - -template - < - typename Key, // type of key within ValueType - typename ValueType, - typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType - typename HashCompare, // has hash and equal - typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type > - > -class hash_buffer : public HashCompare { -public: - static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table - typedef ValueType value_type; - typedef typename buffer_element_type< value_type >::type element_type; - typedef value_type *pointer_type; - typedef element_type *list_array_type; // array we manage manually - typedef list_array_type *pointer_array_type; - typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type; - typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator; - typedef typename std::decay<Key>::type Knoref; - -private: - ValueToKey *my_key; - size_t my_size; - size_t nelements; - pointer_array_type pointer_array; // pointer_array[my_size] - list_array_type elements_array; // elements_array[my_size / 2] - element_type* free_list; - - size_t mask() { return my_size - 1; } - - void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { - for(size_t i=0; i < sz - 1; ++i ) { // construct free list - la[i].second = &(la[i+1]); - } - la[sz-1].second = NULL; - *p_free_list = (element_type *)&(la[0]); - } - - // cleanup for exceptions - struct DoCleanup { - pointer_array_type *my_pa; - list_array_type *my_elements; - size_t my_size; - - DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) : - my_pa(&pa), my_elements(&my_els), my_size(sz) { } - ~DoCleanup() { - if(my_pa) { - size_t dont_care = 0; - internal_free_buffer(*my_pa, *my_elements, my_size, dont_care); - } - } - }; - - // exception-safety requires we do all the potentially-throwing operations first - void grow_array() { - size_t new_size = my_size*2; - size_t new_nelements = nelements; // internal_free_buffer zeroes this - list_array_type new_elements_array = NULL; - pointer_array_type new_pointer_array = NULL; - list_array_type new_free_list = NULL; - { - DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); - new_elements_array = elements_array_allocator().allocate(my_size); - new_pointer_array = pointer_array_allocator_type().allocate(new_size); - for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL; - set_up_free_list(&new_free_list, new_elements_array, my_size ); - - for(size_t i=0; i < my_size; ++i) { - for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { - value_type *ov = reinterpret_cast<value_type *>(&(op->first)); - // could have std::move semantics - internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); - } - } - my_cleanup.my_pa = NULL; - my_cleanup.my_elements = NULL; - } - - internal_free_buffer(pointer_array, elements_array, my_size, nelements); - free_list = new_free_list; - pointer_array = new_pointer_array; - elements_array = new_elements_array; - my_size = new_size; - nelements = new_nelements; - } - - // v should have perfect forwarding if std::move implemented. - // we use this method to move elements in grow_array, so can't use class fields - void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, - const value_type &v) { - size_t l_mask = p_sz-1; - __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - size_t h = this->hash((*my_key)(v)) & l_mask; - __TBB_ASSERT(p_free_list, "Error: free list not set up."); - element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); - (void) new(&(my_elem->first)) value_type(v); - my_elem->second = p_pointer_array[h]; - p_pointer_array[h] = my_elem; - } - - void internal_initialize_buffer() { - pointer_array = pointer_array_allocator_type().allocate(my_size); - for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL; - elements_array = elements_array_allocator().allocate(my_size / 2); - set_up_free_list(&free_list, elements_array, my_size / 2); - } - - // made static so an enclosed class can use to properly dispose of the internals - static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) { - if(pa) { - for(size_t i = 0; i < sz; ++i ) { - element_type *p_next; - for( element_type *p = pa[i]; p; p = p_next) { - p_next = (element_type *)p->second; - // TODO revamp: make sure type casting is correct. - void* ptr = (void*)(p->first); -#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER - suppress_unused_warning(ptr); -#endif - ((value_type*)ptr)->~value_type(); - } - } - pointer_array_allocator_type().deallocate(pa, sz); - pa = NULL; - } - // Separate test (if allocation of pa throws, el may be allocated. - // but no elements will be constructed.) - if(el) { - elements_array_allocator().deallocate(el, sz / 2); - el = NULL; - } - sz = INITIAL_SIZE; - ne = 0; - } - -public: - hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) { - internal_initialize_buffer(); - } - - ~hash_buffer() { - internal_free_buffer(pointer_array, elements_array, my_size, nelements); - if(my_key) delete my_key; - } - hash_buffer(const hash_buffer&) = delete; - hash_buffer& operator=(const hash_buffer&) = delete; - - void reset() { - internal_free_buffer(pointer_array, elements_array, my_size, nelements); - internal_initialize_buffer(); - } - - // Take ownership of func object allocated with new. - // This method is only used internally, so can't be misused by user. - void set_key_func(ValueToKey *vtk) { my_key = vtk; } - // pointer is used to clone() - ValueToKey* get_key_func() { return my_key; } - - bool insert_with_key(const value_type &v) { - pointer_type p = NULL; - __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - if(find_ref_with_key((*my_key)(v), p)) { - p->~value_type(); - (void) new(p) value_type(v); // copy-construct into the space - return false; - } - ++nelements; - if(nelements*2 > my_size) grow_array(); - internal_insert_with_key(pointer_array, my_size, free_list, v); - return true; - } - - // returns true and sets v to array element if found, else returns false. - bool find_ref_with_key(const Knoref& k, pointer_type &v) { - size_t i = this->hash(k) & mask(); - for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { - pointer_type pv = reinterpret_cast<pointer_type>(&(p->first)); - __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - if(this->equal((*my_key)(*pv), k)) { - v = pv; - return true; - } - } - return false; - } - - bool find_with_key( const Knoref& k, value_type &v) { - value_type *p; - if(find_ref_with_key(k, p)) { - v = *p; - return true; - } - else - return false; - } - - void delete_with_key(const Knoref& k) { - size_t h = this->hash(k) & mask(); - element_type* prev = NULL; - for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { - value_type *vp = reinterpret_cast<value_type *>(&(p->first)); - __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - if(this->equal((*my_key)(*vp), k)) { - vp->~value_type(); - if(prev) prev->second = p->second; - else pointer_array[h] = (element_type *)(p->second); - p->second = free_list; - free_list = p; - --nelements; - return; - } - } - __TBB_ASSERT(false, "key not found for delete"); - } -}; -#endif // __TBB__flow_graph_hash_buffer_impl_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// a hash table buffer that can expand, and can support as many deletions as +// additions, list-based, with elements of list held in array (for destruction +// management), multiplicative hashing (like ets). No synchronization built-in. +// + +#ifndef __TBB__flow_graph_hash_buffer_impl_H +#define __TBB__flow_graph_hash_buffer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::flow::interfaceX::internal + +// elements in the table are a simple list; we need pointer to next element to +// traverse the chain +template<typename ValueType> +struct buffer_element_type { + // the second parameter below is void * because we can't forward-declare the type + // itself, so we just reinterpret_cast below. + typedef typename aligned_pair<ValueType, void *>::type type; +}; + +template + < + typename Key, // type of key within ValueType + typename ValueType, + typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType + typename HashCompare, // has hash and equal + typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type > + > +class hash_buffer : public HashCompare { +public: + static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table + typedef ValueType value_type; + typedef typename buffer_element_type< value_type >::type element_type; + typedef value_type *pointer_type; + typedef element_type *list_array_type; // array we manage manually + typedef list_array_type *pointer_array_type; + typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type; + typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator; + typedef typename std::decay<Key>::type Knoref; + +private: + ValueToKey *my_key; + size_t my_size; + size_t nelements; + pointer_array_type pointer_array; // pointer_array[my_size] + list_array_type elements_array; // elements_array[my_size / 2] + element_type* free_list; + + size_t mask() { return my_size - 1; } + + void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { + for(size_t i=0; i < sz - 1; ++i ) { // construct free list + la[i].second = &(la[i+1]); + } + la[sz-1].second = NULL; + *p_free_list = (element_type *)&(la[0]); + } + + // cleanup for exceptions + struct DoCleanup { + pointer_array_type *my_pa; + list_array_type *my_elements; + size_t my_size; + + DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) : + my_pa(&pa), my_elements(&my_els), my_size(sz) { } + ~DoCleanup() { + if(my_pa) { + size_t dont_care = 0; + internal_free_buffer(*my_pa, *my_elements, my_size, dont_care); + } + } + }; + + // exception-safety requires we do all the potentially-throwing operations first + void grow_array() { + size_t new_size = my_size*2; + size_t new_nelements = nelements; // internal_free_buffer zeroes this + list_array_type new_elements_array = NULL; + pointer_array_type new_pointer_array = NULL; + list_array_type new_free_list = NULL; + { + DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); + new_elements_array = elements_array_allocator().allocate(my_size); + new_pointer_array = pointer_array_allocator_type().allocate(new_size); + for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL; + set_up_free_list(&new_free_list, new_elements_array, my_size ); + + for(size_t i=0; i < my_size; ++i) { + for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { + value_type *ov = reinterpret_cast<value_type *>(&(op->first)); + // could have std::move semantics + internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); + } + } + my_cleanup.my_pa = NULL; + my_cleanup.my_elements = NULL; + } + + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + free_list = new_free_list; + pointer_array = new_pointer_array; + elements_array = new_elements_array; + my_size = new_size; + nelements = new_nelements; + } + + // v should have perfect forwarding if std::move implemented. + // we use this method to move elements in grow_array, so can't use class fields + void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, + const value_type &v) { + size_t l_mask = p_sz-1; + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + size_t h = this->hash((*my_key)(v)) & l_mask; + __TBB_ASSERT(p_free_list, "Error: free list not set up."); + element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); + (void) new(&(my_elem->first)) value_type(v); + my_elem->second = p_pointer_array[h]; + p_pointer_array[h] = my_elem; + } + + void internal_initialize_buffer() { + pointer_array = pointer_array_allocator_type().allocate(my_size); + for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL; + elements_array = elements_array_allocator().allocate(my_size / 2); + set_up_free_list(&free_list, elements_array, my_size / 2); + } + + // made static so an enclosed class can use to properly dispose of the internals + static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) { + if(pa) { + for(size_t i = 0; i < sz; ++i ) { + element_type *p_next; + for( element_type *p = pa[i]; p; p = p_next) { + p_next = (element_type *)p->second; + // TODO revamp: make sure type casting is correct. + void* ptr = (void*)(p->first); +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER + suppress_unused_warning(ptr); +#endif + ((value_type*)ptr)->~value_type(); + } + } + pointer_array_allocator_type().deallocate(pa, sz); + pa = NULL; + } + // Separate test (if allocation of pa throws, el may be allocated. + // but no elements will be constructed.) + if(el) { + elements_array_allocator().deallocate(el, sz / 2); + el = NULL; + } + sz = INITIAL_SIZE; + ne = 0; + } + +public: + hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) { + internal_initialize_buffer(); + } + + ~hash_buffer() { + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + if(my_key) delete my_key; + } + hash_buffer(const hash_buffer&) = delete; + hash_buffer& operator=(const hash_buffer&) = delete; + + void reset() { + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + internal_initialize_buffer(); + } + + // Take ownership of func object allocated with new. + // This method is only used internally, so can't be misused by user. + void set_key_func(ValueToKey *vtk) { my_key = vtk; } + // pointer is used to clone() + ValueToKey* get_key_func() { return my_key; } + + bool insert_with_key(const value_type &v) { + pointer_type p = NULL; + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(find_ref_with_key((*my_key)(v), p)) { + p->~value_type(); + (void) new(p) value_type(v); // copy-construct into the space + return false; + } + ++nelements; + if(nelements*2 > my_size) grow_array(); + internal_insert_with_key(pointer_array, my_size, free_list, v); + return true; + } + + // returns true and sets v to array element if found, else returns false. + bool find_ref_with_key(const Knoref& k, pointer_type &v) { + size_t i = this->hash(k) & mask(); + for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { + pointer_type pv = reinterpret_cast<pointer_type>(&(p->first)); + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(this->equal((*my_key)(*pv), k)) { + v = pv; + return true; + } + } + return false; + } + + bool find_with_key( const Knoref& k, value_type &v) { + value_type *p; + if(find_ref_with_key(k, p)) { + v = *p; + return true; + } + else + return false; + } + + void delete_with_key(const Knoref& k) { + size_t h = this->hash(k) & mask(); + element_type* prev = NULL; + for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { + value_type *vp = reinterpret_cast<value_type *>(&(p->first)); + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(this->equal((*my_key)(*vp), k)) { + vp->~value_type(); + if(prev) prev->second = p->second; + else pointer_array[h] = (element_type *)(p->second); + p->second = free_list; + free_list = p; + --nelements; + return; + } + } + __TBB_ASSERT(false, "key not found for delete"); + } +}; +#endif // __TBB__flow_graph_hash_buffer_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h index d8256ca8a2..be8ad53a04 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h @@ -1,364 +1,364 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _FGT_GRAPH_TRACE_IMPL_H -#define _FGT_GRAPH_TRACE_IMPL_H - -#include "../profiling.h" -#if (_MSC_VER >= 1900) - #include <intrin.h> -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -template< typename T > class sender; -template< typename T > class receiver; - -#if TBB_USE_PROFILING_TOOLS - #if __TBB_FLOW_TRACE_CODEPTR - #if (_MSC_VER >= 1900) - #define CODEPTR() (_ReturnAddress()) - #elif __TBB_GCC_VERSION >= 40800 - #define CODEPTR() ( __builtin_return_address(0)) - #else - #define CODEPTR() NULL - #endif - #else - #define CODEPTR() NULL - #endif /* __TBB_FLOW_TRACE_CODEPTR */ - -static inline void fgt_alias_port(void *node, void *p, bool visible) { - if(visible) - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); - else - itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); -} - -static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { - itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); - suppress_unused_warning( codeptr ); -#if __TBB_FLOW_TRACE_CODEPTR - if (codeptr != NULL) { - register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); - } -#endif -} - -static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); -} - -static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); -} - -template<typename InputType> -void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { - // TODO: Make fgt_internal_alias_input_port a function template? - fgt_internal_alias_input_port( node, port, name_index); -} - -template < typename PortsTuple, int N > -struct fgt_internal_input_alias_helper { - static void alias_port( void *node, PortsTuple &ports ) { - alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); - fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); - } -}; - -template < typename PortsTuple > -struct fgt_internal_input_alias_helper<PortsTuple, 0> { - static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { } -}; - -template<typename OutputType> -void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) { - // TODO: Make fgt_internal_alias_output_port a function template? - fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index); -} - -template < typename PortsTuple, int N > -struct fgt_internal_output_alias_helper { - static void alias_port( void *node, PortsTuple &ports ) { - alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); - fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); - } -}; - -template < typename PortsTuple > -struct fgt_internal_output_alias_helper<PortsTuple, 0> { - static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) { - } -}; - -static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); -} - -static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { - itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); - suppress_unused_warning( codeptr ); -#if __TBB_FLOW_TRACE_CODEPTR - if (codeptr != NULL) { - register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); - } -#endif -} - -template<typename InputType> -void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { - // TODO: Make fgt_internal_create_input_port a function template? - fgt_internal_create_input_port(node, static_cast<void*>(port), name_index); -} - -template < typename PortsTuple, int N > -struct fgt_internal_input_helper { - static void register_port( void *node, PortsTuple &ports ) { - register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); - fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports ); - } -}; - -template < typename PortsTuple > -struct fgt_internal_input_helper<PortsTuple, 1> { - static void register_port( void *node, PortsTuple &ports ) { - register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 ); - } -}; - -template<typename OutputType> -void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) { - // TODO: Make fgt_internal_create_output_port a function template? - fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index); -} - -template < typename PortsTuple, int N > -struct fgt_internal_output_helper { - static void register_port( void* codeptr, void *node, PortsTuple &ports ) { - register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); - fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports ); - } -}; - -template < typename PortsTuple > -struct fgt_internal_output_helper<PortsTuple,1> { - static void register_port( void* codeptr, void *node, PortsTuple &ports ) { - register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 ); - } -}; - -template< typename NodeType > -void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { - void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); -} - -template< typename NodeType > -void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { - void *addr = const_cast<NodeType *>(node); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); -} - -template< typename NodeType > -static inline void fgt_node_desc( const NodeType *node, const char *desc ) { - void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); -} - -static inline void fgt_graph_desc( const void *g, const char *desc ) { - void *addr = const_cast< void *>(g); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); -} - -static inline void fgt_body( void *node, void *body ) { - itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); -} - -template< int N, typename PortsTuple > -static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { - itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); - fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); - fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports ); -} - -template< int N, typename PortsTuple > -static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { - itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); - fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); - fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports ); - fgt_body( input_port, body ); -} - -template< int N, typename PortsTuple > -static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); - fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); - fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports ); -} - -static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { - itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); - suppress_unused_warning( codeptr ); -#if __TBB_FLOW_TRACE_CODEPTR - if (codeptr != NULL) { - register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); - } -#endif -} - -static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); - fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); -} - -static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); - fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); - fgt_body( output_port, body ); -} - -static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) { - fgt_node( codeptr, t, g, output_port ); - fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); -} - -static inline void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) { - fgt_node_with_body( codeptr, t, g, output_port, body ); - fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); -} - - -static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) { - fgt_node( codeptr, t, g, input_port, output_port ); - fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); -} - -static inline void fgt_make_edge( void *output_port, void *input_port ) { - itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); -} - -static inline void fgt_remove_edge( void *output_port, void *input_port ) { - itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); -} - -static inline void fgt_graph( void *g ) { - itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); -} - -static inline void fgt_begin_body( void *body ) { - itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY ); -} - -static inline void fgt_end_body( void * ) { - itt_task_end( ITT_DOMAIN_FLOW ); -} - -static inline void fgt_async_try_put_begin( void *node, void *port ) { - itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); -} - -static inline void fgt_async_try_put_end( void *, void * ) { - itt_task_end( ITT_DOMAIN_FLOW ); -} - -static inline void fgt_async_reserve( void *node, void *graph ) { - itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); -} - -static inline void fgt_async_commit( void *node, void * /*graph*/) { - itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); -} - -static inline void fgt_reserve_wait( void *graph ) { - itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL ); -} - -static inline void fgt_release_wait( void *graph ) { - itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); -} - -#else // TBB_USE_PROFILING_TOOLS - -#define CODEPTR() NULL - -static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { } - -static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { } - -static inline void fgt_graph( void * /*g*/ ) { } - -template< typename NodeType > -static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } - -template< typename NodeType > -static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } - -static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } - -template< int N, typename PortsTuple > -static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } - -template< int N, typename PortsTuple > -static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { } - -template< int N, typename PortsTuple > -static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { } - -static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } - -static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } -static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } - -static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { } -static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { } - -static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { } -static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { } - -static inline void fgt_begin_body( void * /*body*/ ) { } -static inline void fgt_end_body( void * /*body*/) { } - -static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { } -static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { } -static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { } -static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { } -static inline void fgt_reserve_wait( void * /*graph*/ ) { } -static inline void fgt_release_wait( void * /*graph*/ ) { } - -template< typename NodeType > -void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } - -template < typename PortsTuple, int N > -struct fgt_internal_input_alias_helper { - static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } -}; - -template < typename PortsTuple, int N > -struct fgt_internal_output_alias_helper { - static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } -}; - -#endif // TBB_USE_PROFILING_TOOLS - -} // d1 -} // namespace detail -} // namespace tbb - -#endif // _FGT_GRAPH_TRACE_IMPL_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _FGT_GRAPH_TRACE_IMPL_H +#define _FGT_GRAPH_TRACE_IMPL_H + +#include "../profiling.h" +#if (_MSC_VER >= 1900) + #include <intrin.h> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template< typename T > class sender; +template< typename T > class receiver; + +#if TBB_USE_PROFILING_TOOLS + #if __TBB_FLOW_TRACE_CODEPTR + #if (_MSC_VER >= 1900) + #define CODEPTR() (_ReturnAddress()) + #elif __TBB_GCC_VERSION >= 40800 + #define CODEPTR() ( __builtin_return_address(0)) + #else + #define CODEPTR() NULL + #endif + #else + #define CODEPTR() NULL + #endif /* __TBB_FLOW_TRACE_CODEPTR */ + +static inline void fgt_alias_port(void *node, void *p, bool visible) { + if(visible) + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); + else + itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); +} + +static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { + itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); +} + +static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); +} + +template<typename InputType> +void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_alias_input_port a function template? + fgt_internal_alias_input_port( node, port, name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_input_alias_helper { + static void alias_port( void *node, PortsTuple &ports ) { + alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); + fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_input_alias_helper<PortsTuple, 0> { + static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { } +}; + +template<typename OutputType> +void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_alias_output_port a function template? + fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_output_alias_helper { + static void alias_port( void *node, PortsTuple &ports ) { + alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); + fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_output_alias_helper<PortsTuple, 0> { + static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) { + } +}; + +static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); +} + +static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { + itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +template<typename InputType> +void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_create_input_port a function template? + fgt_internal_create_input_port(node, static_cast<void*>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_input_helper { + static void register_port( void *node, PortsTuple &ports ) { + register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); + fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_input_helper<PortsTuple, 1> { + static void register_port( void *node, PortsTuple &ports ) { + register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 ); + } +}; + +template<typename OutputType> +void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_create_output_port a function template? + fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_output_helper { + static void register_port( void* codeptr, void *node, PortsTuple &ports ) { + register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); + fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_output_helper<PortsTuple,1> { + static void register_port( void* codeptr, void *node, PortsTuple &ports ) { + register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 ); + } +}; + +template< typename NodeType > +void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { + void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +template< typename NodeType > +void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { + void *addr = const_cast<NodeType *>(node); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +template< typename NodeType > +static inline void fgt_node_desc( const NodeType *node, const char *desc ) { + void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +static inline void fgt_graph_desc( const void *g, const char *desc ) { + void *addr = const_cast< void *>(g); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); +} + +static inline void fgt_body( void *node, void *body ) { + itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { + itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); + fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { + itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); + fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports ); + fgt_body( input_port, body ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); + fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports ); +} + +static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { + itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); +} + +static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); + fgt_body( output_port, body ); +} + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) { + fgt_node( codeptr, t, g, output_port ); + fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); +} + +static inline void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) { + fgt_node_with_body( codeptr, t, g, output_port, body ); + fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); +} + + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) { + fgt_node( codeptr, t, g, input_port, output_port ); + fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); +} + +static inline void fgt_make_edge( void *output_port, void *input_port ) { + itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); +} + +static inline void fgt_remove_edge( void *output_port, void *input_port ) { + itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); +} + +static inline void fgt_graph( void *g ) { + itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); +} + +static inline void fgt_begin_body( void *body ) { + itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY ); +} + +static inline void fgt_end_body( void * ) { + itt_task_end( ITT_DOMAIN_FLOW ); +} + +static inline void fgt_async_try_put_begin( void *node, void *port ) { + itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); +} + +static inline void fgt_async_try_put_end( void *, void * ) { + itt_task_end( ITT_DOMAIN_FLOW ); +} + +static inline void fgt_async_reserve( void *node, void *graph ) { + itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); +} + +static inline void fgt_async_commit( void *node, void * /*graph*/) { + itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); +} + +static inline void fgt_reserve_wait( void *graph ) { + itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL ); +} + +static inline void fgt_release_wait( void *graph ) { + itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); +} + +#else // TBB_USE_PROFILING_TOOLS + +#define CODEPTR() NULL + +static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { } + +static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { } + +static inline void fgt_graph( void * /*g*/ ) { } + +template< typename NodeType > +static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +template< typename NodeType > +static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { } + +static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } + +static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } +static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } + +static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { } +static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { } + +static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { } +static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { } + +static inline void fgt_begin_body( void * /*body*/ ) { } +static inline void fgt_end_body( void * /*body*/) { } + +static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { } +static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { } +static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { } +static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { } +static inline void fgt_reserve_wait( void * /*graph*/ ) { } +static inline void fgt_release_wait( void * /*graph*/ ) { } + +template< typename NodeType > +void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +template < typename PortsTuple, int N > +struct fgt_internal_input_alias_helper { + static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } +}; + +template < typename PortsTuple, int N > +struct fgt_internal_output_alias_helper { + static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } +}; + +#endif // TBB_USE_PROFILING_TOOLS + +} // d1 +} // namespace detail +} // namespace tbb + +#endif // _FGT_GRAPH_TRACE_IMPL_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h index 97c770b154..e00dd14210 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h @@ -1,407 +1,407 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__flow_graph_types_impl_H -#define __TBB__flow_graph_types_impl_H - -#ifndef __TBB_flow_graph_H -#error Do not #include this internal file directly; use public TBB headers instead. -#endif - -// included in namespace tbb::detail::d1 - -// the change to key_matching (adding a K and KHash template parameter, making it a class) -// means we have to pass this data to the key_matching_port. All the ports have only one -// template parameter, so we have to wrap the following types in a trait: -// -// . K == key_type -// . KHash == hash and compare for Key -// . TtoK == function_body that given an object of T, returns its K -// . T == type accepted by port, and stored in the hash table -// -// The port will have an additional parameter on node construction, which is a function_body -// that accepts a const T& and returns a K which is the field in T which is its K. -template<typename Kp, typename KHashp, typename Tp> -struct KeyTrait { - typedef Kp K; - typedef Tp T; - typedef type_to_key_function_body<T,K> TtoK; - typedef KHashp KHash; -}; - -// wrap each element of a tuple in a template, and make a tuple of the result. -template<int N, template<class> class PT, typename TypeTuple> -struct wrap_tuple_elements; - -// A wrapper that generates the traits needed for each port of a key-matching join, -// and the type of the tuple of input ports. -template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple> -struct wrap_key_tuple_elements; - -template<int N, template<class> class PT, typename... Args> -struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{ - typedef typename std::tuple<PT<Args>... > type; -}; - -template<int N, template<class> class PT, typename KeyTraits, typename... Args> -struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > { - typedef typename KeyTraits::key_type K; - typedef typename KeyTraits::hash_compare_type KHash; - typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type; -}; - -template< int... S > class sequence {}; - -template< int N, int... S > -struct make_sequence : make_sequence < N - 1, N - 1, S... > {}; - -template< int... S > -struct make_sequence < 0, S... > { - typedef sequence<S...> type; -}; - -//! type mimicking std::pair but with trailing fill to ensure each element of an array -//* will have the correct alignment -template<typename T1, typename T2, size_t REM> -struct type_plus_align { - char first[sizeof(T1)]; - T2 second; - char fill1[REM]; -}; - -template<typename T1, typename T2> -struct type_plus_align<T1,T2,0> { - char first[sizeof(T1)]; - T2 second; -}; - -template<class U> struct alignment_of { - typedef struct { char t; U padded; } test_alignment; - static const size_t value = sizeof(test_alignment) - sizeof(U); -}; - -// T1, T2 are actual types stored. The space defined for T1 in the type returned -// is a char array of the correct size. Type T2 should be trivially-constructible, -// T1 must be explicitly managed. -template<typename T1, typename T2> -struct aligned_pair { - static const size_t t1_align = alignment_of<T1>::value; - static const size_t t2_align = alignment_of<T2>::value; - typedef type_plus_align<T1, T2, 0 > just_pair; - static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; - static const size_t extra_bytes = sizeof(just_pair) % max_align; - static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; -public: - typedef type_plus_align<T1,T2,remainder> type; -}; // aligned_pair - -// support for variant type -// type we use when we're not storing a value -struct default_constructed { }; - -// type which contains another type, tests for what type is contained, and references to it. -// Wrapper<T> -// void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace - -// struct to allow us to copy and test the type of objects -struct WrapperBase { - virtual ~WrapperBase() {} - virtual void CopyTo(void* /*newSpace*/) const = 0; -}; - -// Wrapper<T> contains a T, with the ability to test what T is. The Wrapper<T> can be -// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be -// examined via value(), but not modified. -template<typename T> -struct Wrapper: public WrapperBase { - typedef T value_type; - typedef T* pointer_type; -private: - T value_space; -public: - const value_type &value() const { return value_space; } - -private: - Wrapper(); - - // on exception will ensure the Wrapper will contain only a trivially-constructed object - struct _unwind_space { - pointer_type space; - _unwind_space(pointer_type p) : space(p) {} - ~_unwind_space() { - if(space) (void) new (space) Wrapper<default_constructed>(default_constructed()); - } - }; -public: - explicit Wrapper( const T& other ) : value_space(other) { } - explicit Wrapper(const Wrapper& other) = delete; - - void CopyTo(void* newSpace) const override { - _unwind_space guard((pointer_type)newSpace); - (void) new(newSpace) Wrapper(value_space); - guard.space = NULL; - } - ~Wrapper() { } -}; - -// specialization for array objects -template<typename T, size_t N> -struct Wrapper<T[N]> : public WrapperBase { - typedef T value_type; - typedef T* pointer_type; - // space must be untyped. - typedef T ArrayType[N]; -private: - // The space is not of type T[N] because when copy-constructing, it would be - // default-initialized and then copied to in some fashion, resulting in two - // constructions and one destruction per element. If the type is char[ ], we - // placement new into each element, resulting in one construction per element. - static const size_t space_size = sizeof(ArrayType) / sizeof(char); - char value_space[space_size]; - - - // on exception will ensure the already-built objects will be destructed - // (the value_space is a char array, so it is already trivially-destructible.) - struct _unwind_class { - pointer_type space; - int already_built; - _unwind_class(pointer_type p) : space(p), already_built(0) {} - ~_unwind_class() { - if(space) { - for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type(); - (void) new(space) Wrapper<default_constructed>(default_constructed()); - } - } - }; -public: - const ArrayType &value() const { - char *vp = const_cast<char *>(value_space); - return reinterpret_cast<ArrayType &>(*vp); - } - -private: - Wrapper(); -public: - // have to explicitly construct because other decays to a const value_type* - explicit Wrapper(const ArrayType& other) { - _unwind_class guard((pointer_type)value_space); - pointer_type vp = reinterpret_cast<pointer_type>(&value_space); - for(size_t i = 0; i < N; ++i ) { - (void) new(vp++) value_type(other[i]); - ++(guard.already_built); - } - guard.space = NULL; - } - explicit Wrapper(const Wrapper& other) : WrapperBase() { - // we have to do the heavy lifting to copy contents - _unwind_class guard((pointer_type)value_space); - pointer_type dp = reinterpret_cast<pointer_type>(value_space); - pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space)); - for(size_t i = 0; i < N; ++i, ++dp, ++sp) { - (void) new(dp) value_type(*sp); - ++(guard.already_built); - } - guard.space = NULL; - } - - void CopyTo(void* newSpace) const override { - (void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor - } - - ~Wrapper() { - // have to destroy explicitly in reverse order - pointer_type vp = reinterpret_cast<pointer_type>(&value_space); - for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type(); - } -}; - -// given a tuple, return the type of the element that has the maximum alignment requirement. -// Given a tuple and that type, return the number of elements of the object with the max -// alignment requirement that is at least as big as the largest object in the tuple. - -template<bool, class T1, class T2> struct pick_one; -template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; }; -template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; }; - -template< template<class> class Selector, typename T1, typename T2 > -struct pick_max { - typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type; -}; - -template<typename T> struct size_of { static const int value = sizeof(T); }; - -template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max { - typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType; - typedef typename std::tuple_element<N-1, Tuple>::type ThisType; - typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type; -}; - -template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> { - typedef typename std::tuple_element<0, Tuple>::type type; -}; - -// is the specified type included in a tuple? -template<class Q, size_t N, class Tuple> -struct is_element_of { - typedef typename std::tuple_element<N-1, Tuple>::type T_i; - static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value; -}; - -template<class Q, class Tuple> -struct is_element_of<Q,0,Tuple> { - typedef typename std::tuple_element<0, Tuple>::type T_i; - static const bool value = std::is_same<Q,T_i>::value; -}; - -// allow the construction of types that are listed tuple. If a disallowed type -// construction is written, a method involving this type is created. The -// type has no definition, so a syntax error is generated. -template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple; - -template<typename T, bool BUILD_IT> struct do_if; -template<typename T> -struct do_if<T, true> { - static void construct(void *mySpace, const T& x) { - (void) new(mySpace) Wrapper<T>(x); - } -}; -template<typename T> -struct do_if<T, false> { - static void construct(void * /*mySpace*/, const T& x) { - // This method is instantiated when the type T does not match any of the - // element types in the Tuple in variant<Tuple>. - ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x); - } -}; - -// Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in -// Wrapper, and how big Wrapper is. -// -// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>(). - -using tbb::detail::punned_cast; -struct tagged_null_type {}; -template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type, - typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type, - typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type> -class tagged_msg { - typedef std::tuple<T0, T1, T2, T3, T4 - //TODO: Should we reject lists longer than a tuple can hold? - #if __TBB_VARIADIC_MAX >= 6 - , T5 - #endif - #if __TBB_VARIADIC_MAX >= 7 - , T6 - #endif - #if __TBB_VARIADIC_MAX >= 8 - , T7 - #endif - #if __TBB_VARIADIC_MAX >= 9 - , T8 - #endif - #if __TBB_VARIADIC_MAX >= 10 - , T9 - #endif - > Tuple; - -private: - class variant { - static const size_t N = std::tuple_size<Tuple>::value; - typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType; - typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType; - static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1); - static const size_t MaxNElements = MaxNBytes/sizeof(AlignType); - typedef aligned_space<AlignType, MaxNElements> SpaceType; - SpaceType my_space; - static const size_t MaxSize = sizeof(SpaceType); - - public: - variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); } - - template<typename T> - variant( const T& x ) { - do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x); - } - - variant(const variant& other) { - const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space)); - h->CopyTo(&my_space); - } - - // assignment must destroy and re-create the Wrapper type, as there is no way - // to create a Wrapper-to-Wrapper assign even if we find they agree in type. - void operator=( const variant& rhs ) { - if(&rhs != this) { - WrapperBase *h = punned_cast<WrapperBase *>(&my_space); - h->~WrapperBase(); - const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space)); - ch->CopyTo(&my_space); - } - } - - template<typename U> - const U& variant_cast_to() const { - const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)); - if(!h) { - throw_exception(exception_id::bad_tagged_msg_cast); - } - return h->value(); - } - template<typename U> - bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; } - - bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();} - - ~variant() { - WrapperBase *h = punned_cast<WrapperBase *>(&my_space); - h->~WrapperBase(); - } - }; //class variant - - TagType my_tag; - variant my_msg; - -public: - tagged_msg(): my_tag(TagType(~0)), my_msg(){} - - template<typename T, typename R> - tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {} - - template<typename T, typename R, size_t N> - tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {} - - void set_tag(TagType const &index) {my_tag = index;} - TagType tag() const {return my_tag;} - - template<typename V> - const V& cast_to() const {return my_msg.template variant_cast_to<V>();} - - template<typename V> - bool is_a() const {return my_msg.template variant_is_a<V>();} - - bool is_default_constructed() const {return my_msg.variant_is_default_constructed();} -}; //class tagged_msg - -// template to simplify cast and test for tagged_msg in template contexts -template<typename V, typename T> -const V& cast_to(T const &t) { return t.template cast_to<V>(); } - -template<typename V, typename T> -bool is_a(T const &t) { return t.template is_a<V>(); } - -enum op_stat { WAIT = 0, SUCCEEDED, FAILED }; - -#endif /* __TBB__flow_graph_types_impl_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_types_impl_H +#define __TBB__flow_graph_types_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 + +// the change to key_matching (adding a K and KHash template parameter, making it a class) +// means we have to pass this data to the key_matching_port. All the ports have only one +// template parameter, so we have to wrap the following types in a trait: +// +// . K == key_type +// . KHash == hash and compare for Key +// . TtoK == function_body that given an object of T, returns its K +// . T == type accepted by port, and stored in the hash table +// +// The port will have an additional parameter on node construction, which is a function_body +// that accepts a const T& and returns a K which is the field in T which is its K. +template<typename Kp, typename KHashp, typename Tp> +struct KeyTrait { + typedef Kp K; + typedef Tp T; + typedef type_to_key_function_body<T,K> TtoK; + typedef KHashp KHash; +}; + +// wrap each element of a tuple in a template, and make a tuple of the result. +template<int N, template<class> class PT, typename TypeTuple> +struct wrap_tuple_elements; + +// A wrapper that generates the traits needed for each port of a key-matching join, +// and the type of the tuple of input ports. +template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple> +struct wrap_key_tuple_elements; + +template<int N, template<class> class PT, typename... Args> +struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{ + typedef typename std::tuple<PT<Args>... > type; +}; + +template<int N, template<class> class PT, typename KeyTraits, typename... Args> +struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > { + typedef typename KeyTraits::key_type K; + typedef typename KeyTraits::hash_compare_type KHash; + typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type; +}; + +template< int... S > class sequence {}; + +template< int N, int... S > +struct make_sequence : make_sequence < N - 1, N - 1, S... > {}; + +template< int... S > +struct make_sequence < 0, S... > { + typedef sequence<S...> type; +}; + +//! type mimicking std::pair but with trailing fill to ensure each element of an array +//* will have the correct alignment +template<typename T1, typename T2, size_t REM> +struct type_plus_align { + char first[sizeof(T1)]; + T2 second; + char fill1[REM]; +}; + +template<typename T1, typename T2> +struct type_plus_align<T1,T2,0> { + char first[sizeof(T1)]; + T2 second; +}; + +template<class U> struct alignment_of { + typedef struct { char t; U padded; } test_alignment; + static const size_t value = sizeof(test_alignment) - sizeof(U); +}; + +// T1, T2 are actual types stored. The space defined for T1 in the type returned +// is a char array of the correct size. Type T2 should be trivially-constructible, +// T1 must be explicitly managed. +template<typename T1, typename T2> +struct aligned_pair { + static const size_t t1_align = alignment_of<T1>::value; + static const size_t t2_align = alignment_of<T2>::value; + typedef type_plus_align<T1, T2, 0 > just_pair; + static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; + static const size_t extra_bytes = sizeof(just_pair) % max_align; + static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; +public: + typedef type_plus_align<T1,T2,remainder> type; +}; // aligned_pair + +// support for variant type +// type we use when we're not storing a value +struct default_constructed { }; + +// type which contains another type, tests for what type is contained, and references to it. +// Wrapper<T> +// void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace + +// struct to allow us to copy and test the type of objects +struct WrapperBase { + virtual ~WrapperBase() {} + virtual void CopyTo(void* /*newSpace*/) const = 0; +}; + +// Wrapper<T> contains a T, with the ability to test what T is. The Wrapper<T> can be +// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be +// examined via value(), but not modified. +template<typename T> +struct Wrapper: public WrapperBase { + typedef T value_type; + typedef T* pointer_type; +private: + T value_space; +public: + const value_type &value() const { return value_space; } + +private: + Wrapper(); + + // on exception will ensure the Wrapper will contain only a trivially-constructed object + struct _unwind_space { + pointer_type space; + _unwind_space(pointer_type p) : space(p) {} + ~_unwind_space() { + if(space) (void) new (space) Wrapper<default_constructed>(default_constructed()); + } + }; +public: + explicit Wrapper( const T& other ) : value_space(other) { } + explicit Wrapper(const Wrapper& other) = delete; + + void CopyTo(void* newSpace) const override { + _unwind_space guard((pointer_type)newSpace); + (void) new(newSpace) Wrapper(value_space); + guard.space = NULL; + } + ~Wrapper() { } +}; + +// specialization for array objects +template<typename T, size_t N> +struct Wrapper<T[N]> : public WrapperBase { + typedef T value_type; + typedef T* pointer_type; + // space must be untyped. + typedef T ArrayType[N]; +private: + // The space is not of type T[N] because when copy-constructing, it would be + // default-initialized and then copied to in some fashion, resulting in two + // constructions and one destruction per element. If the type is char[ ], we + // placement new into each element, resulting in one construction per element. + static const size_t space_size = sizeof(ArrayType) / sizeof(char); + char value_space[space_size]; + + + // on exception will ensure the already-built objects will be destructed + // (the value_space is a char array, so it is already trivially-destructible.) + struct _unwind_class { + pointer_type space; + int already_built; + _unwind_class(pointer_type p) : space(p), already_built(0) {} + ~_unwind_class() { + if(space) { + for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type(); + (void) new(space) Wrapper<default_constructed>(default_constructed()); + } + } + }; +public: + const ArrayType &value() const { + char *vp = const_cast<char *>(value_space); + return reinterpret_cast<ArrayType &>(*vp); + } + +private: + Wrapper(); +public: + // have to explicitly construct because other decays to a const value_type* + explicit Wrapper(const ArrayType& other) { + _unwind_class guard((pointer_type)value_space); + pointer_type vp = reinterpret_cast<pointer_type>(&value_space); + for(size_t i = 0; i < N; ++i ) { + (void) new(vp++) value_type(other[i]); + ++(guard.already_built); + } + guard.space = NULL; + } + explicit Wrapper(const Wrapper& other) : WrapperBase() { + // we have to do the heavy lifting to copy contents + _unwind_class guard((pointer_type)value_space); + pointer_type dp = reinterpret_cast<pointer_type>(value_space); + pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space)); + for(size_t i = 0; i < N; ++i, ++dp, ++sp) { + (void) new(dp) value_type(*sp); + ++(guard.already_built); + } + guard.space = NULL; + } + + void CopyTo(void* newSpace) const override { + (void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor + } + + ~Wrapper() { + // have to destroy explicitly in reverse order + pointer_type vp = reinterpret_cast<pointer_type>(&value_space); + for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type(); + } +}; + +// given a tuple, return the type of the element that has the maximum alignment requirement. +// Given a tuple and that type, return the number of elements of the object with the max +// alignment requirement that is at least as big as the largest object in the tuple. + +template<bool, class T1, class T2> struct pick_one; +template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; }; +template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; }; + +template< template<class> class Selector, typename T1, typename T2 > +struct pick_max { + typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type; +}; + +template<typename T> struct size_of { static const int value = sizeof(T); }; + +template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max { + typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType; + typedef typename std::tuple_element<N-1, Tuple>::type ThisType; + typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type; +}; + +template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> { + typedef typename std::tuple_element<0, Tuple>::type type; +}; + +// is the specified type included in a tuple? +template<class Q, size_t N, class Tuple> +struct is_element_of { + typedef typename std::tuple_element<N-1, Tuple>::type T_i; + static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value; +}; + +template<class Q, class Tuple> +struct is_element_of<Q,0,Tuple> { + typedef typename std::tuple_element<0, Tuple>::type T_i; + static const bool value = std::is_same<Q,T_i>::value; +}; + +// allow the construction of types that are listed tuple. If a disallowed type +// construction is written, a method involving this type is created. The +// type has no definition, so a syntax error is generated. +template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple; + +template<typename T, bool BUILD_IT> struct do_if; +template<typename T> +struct do_if<T, true> { + static void construct(void *mySpace, const T& x) { + (void) new(mySpace) Wrapper<T>(x); + } +}; +template<typename T> +struct do_if<T, false> { + static void construct(void * /*mySpace*/, const T& x) { + // This method is instantiated when the type T does not match any of the + // element types in the Tuple in variant<Tuple>. + ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x); + } +}; + +// Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in +// Wrapper, and how big Wrapper is. +// +// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>(). + +using tbb::detail::punned_cast; +struct tagged_null_type {}; +template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type, + typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type, + typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type> +class tagged_msg { + typedef std::tuple<T0, T1, T2, T3, T4 + //TODO: Should we reject lists longer than a tuple can hold? + #if __TBB_VARIADIC_MAX >= 6 + , T5 + #endif + #if __TBB_VARIADIC_MAX >= 7 + , T6 + #endif + #if __TBB_VARIADIC_MAX >= 8 + , T7 + #endif + #if __TBB_VARIADIC_MAX >= 9 + , T8 + #endif + #if __TBB_VARIADIC_MAX >= 10 + , T9 + #endif + > Tuple; + +private: + class variant { + static const size_t N = std::tuple_size<Tuple>::value; + typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType; + typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType; + static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1); + static const size_t MaxNElements = MaxNBytes/sizeof(AlignType); + typedef aligned_space<AlignType, MaxNElements> SpaceType; + SpaceType my_space; + static const size_t MaxSize = sizeof(SpaceType); + + public: + variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); } + + template<typename T> + variant( const T& x ) { + do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x); + } + + variant(const variant& other) { + const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space)); + h->CopyTo(&my_space); + } + + // assignment must destroy and re-create the Wrapper type, as there is no way + // to create a Wrapper-to-Wrapper assign even if we find they agree in type. + void operator=( const variant& rhs ) { + if(&rhs != this) { + WrapperBase *h = punned_cast<WrapperBase *>(&my_space); + h->~WrapperBase(); + const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space)); + ch->CopyTo(&my_space); + } + } + + template<typename U> + const U& variant_cast_to() const { + const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)); + if(!h) { + throw_exception(exception_id::bad_tagged_msg_cast); + } + return h->value(); + } + template<typename U> + bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; } + + bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();} + + ~variant() { + WrapperBase *h = punned_cast<WrapperBase *>(&my_space); + h->~WrapperBase(); + } + }; //class variant + + TagType my_tag; + variant my_msg; + +public: + tagged_msg(): my_tag(TagType(~0)), my_msg(){} + + template<typename T, typename R> + tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {} + + template<typename T, typename R, size_t N> + tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {} + + void set_tag(TagType const &index) {my_tag = index;} + TagType tag() const {return my_tag;} + + template<typename V> + const V& cast_to() const {return my_msg.template variant_cast_to<V>();} + + template<typename V> + bool is_a() const {return my_msg.template variant_is_a<V>();} + + bool is_default_constructed() const {return my_msg.variant_is_default_constructed();} +}; //class tagged_msg + +// template to simplify cast and test for tagged_msg in template contexts +template<typename V, typename T> +const V& cast_to(T const &t) { return t.template cast_to<V>(); } + +template<typename V, typename T> +bool is_a(T const &t) { return t.template is_a<V>(); } + +enum op_stat { WAIT = 0, SUCCEEDED, FAILED }; + +#endif /* __TBB__flow_graph_types_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h index 20cbd96c06..1c38b0dc2d 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h @@ -1,127 +1,127 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__hash_compare_H -#define __TBB_detail__hash_compare_H - -#include <functional> - -#include "_containers_helpers.h" - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Key, typename Hash, typename KeyEqual> -class hash_compare { - using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>; -public: - using hasher = Hash; - using key_equal = typename is_transparent_hash::type; - - hash_compare() = default; - hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {} - - std::size_t operator()( const Key& key ) const { - return std::size_t(my_hasher(key)); - } - - bool operator()( const Key& key1, const Key& key2 ) const { - return my_equal(key1, key2); - } - - template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type> - std::size_t operator()( const K& key ) const { - return std::size_t(my_hasher(key)); - } - - template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type> - bool operator()( const K1& key1, const K2& key2 ) const { - return my_equal(key1, key2); - } - - hasher hash_function() const { - return my_hasher; - } - - key_equal key_eq() const { - return my_equal; - } - - -private: - hasher my_hasher; - key_equal my_equal; -}; // class hash_compare - -//! hash_compare that is default argument for concurrent_hash_map -template <typename Key> -class tbb_hash_compare { -public: - std::size_t hash( const Key& a ) const { return my_hash_func(a); } - bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); } -private: - std::hash<Key> my_hash_func; - std::equal_to<Key> my_key_equal; -}; - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS - -namespace std { - -template <typename T, typename U> -struct hash<std::pair<T, U>> { -public: - std::size_t operator()( const std::pair<T, U>& p ) const { - return first_hash(p.first) ^ second_hash(p.second); - } - -private: - std::hash<T> first_hash; - std::hash<U> second_hash; -}; // struct hash<std::pair> - -// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>> -#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER) - -template <typename CharT, typename Traits, typename Allocator> -struct hash<std::basic_string<CharT, Traits, Allocator>> { -public: - std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const { - std::size_t h = 0; - for ( const CharT* c = s.c_str(); *c; ++c ) { - h = h * hash_multiplier ^ char_hash(*c); - } - return h; - } - -private: - static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value; - - std::hash<CharT> char_hash; -}; // struct hash<std::basic_string> - -#endif // !(_LIBCPP_VERSION || _CPPLIB_VER) - -} // namespace std - -#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS - -#endif // __TBB_detail__hash_compare_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__hash_compare_H +#define __TBB_detail__hash_compare_H + +#include <functional> + +#include "_containers_helpers.h" + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename Hash, typename KeyEqual> +class hash_compare { + using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>; +public: + using hasher = Hash; + using key_equal = typename is_transparent_hash::type; + + hash_compare() = default; + hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {} + + std::size_t operator()( const Key& key ) const { + return std::size_t(my_hasher(key)); + } + + bool operator()( const Key& key1, const Key& key2 ) const { + return my_equal(key1, key2); + } + + template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type> + std::size_t operator()( const K& key ) const { + return std::size_t(my_hasher(key)); + } + + template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type> + bool operator()( const K1& key1, const K2& key2 ) const { + return my_equal(key1, key2); + } + + hasher hash_function() const { + return my_hasher; + } + + key_equal key_eq() const { + return my_equal; + } + + +private: + hasher my_hasher; + key_equal my_equal; +}; // class hash_compare + +//! hash_compare that is default argument for concurrent_hash_map +template <typename Key> +class tbb_hash_compare { +public: + std::size_t hash( const Key& a ) const { return my_hash_func(a); } + bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); } +private: + std::hash<Key> my_hash_func; + std::equal_to<Key> my_key_equal; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS + +namespace std { + +template <typename T, typename U> +struct hash<std::pair<T, U>> { +public: + std::size_t operator()( const std::pair<T, U>& p ) const { + return first_hash(p.first) ^ second_hash(p.second); + } + +private: + std::hash<T> first_hash; + std::hash<U> second_hash; +}; // struct hash<std::pair> + +// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>> +#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER) + +template <typename CharT, typename Traits, typename Allocator> +struct hash<std::basic_string<CharT, Traits, Allocator>> { +public: + std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const { + std::size_t h = 0; + for ( const CharT* c = s.c_str(); *c; ++c ) { + h = h * hash_multiplier ^ char_hash(*c); + } + return h; + } + +private: + static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value; + + std::hash<CharT> char_hash; +}; // struct hash<std::basic_string> + +#endif // !(_LIBCPP_VERSION || _CPPLIB_VER) + +} // namespace std + +#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS + +#endif // __TBB_detail__hash_compare_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h index 3270da786a..c4aad58dfc 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h @@ -1,366 +1,366 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__machine_H -#define __TBB_detail__machine_H - -#include "_config.h" -#include "_assert.h" - -#include <atomic> -#include <climits> -#include <cstdint> -#include <cstddef> - -#ifdef _MSC_VER -#include <intrin.h> -#pragma intrinsic(__rdtsc) -#endif -#if __TBB_x86_64 || __TBB_x86_32 -#include <immintrin.h> // _mm_pause -#endif -#if (_WIN32 || _WIN64) -#include <float.h> // _control87 -#endif - -#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN -#include <sched.h> // sched_yield -#else -#include <thread> // std::this_thread::yield() -#endif - -namespace tbb { -namespace detail { -inline namespace d0 { - -//-------------------------------------------------------------------------------------------------- -// Yield implementation -//-------------------------------------------------------------------------------------------------- - -#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN -static inline void yield() { - int err = sched_yield(); - __TBB_ASSERT_EX(err == 0, "sched_yiled has failed"); -} -#else -using std::this_thread::yield; -#endif - -//-------------------------------------------------------------------------------------------------- -// atomic_fence implementation -//-------------------------------------------------------------------------------------------------- - -#if (_WIN32 || _WIN64) -#pragma intrinsic(_mm_mfence) -#endif - -static inline void atomic_fence(std::memory_order order) { -#if (_WIN32 || _WIN64) - if (order == std::memory_order_seq_cst || - order == std::memory_order_acq_rel || - order == std::memory_order_acquire || - order == std::memory_order_release ) - { - _mm_mfence(); - return; - } -#endif /*(_WIN32 || _WIN64)*/ - std::atomic_thread_fence(order); -} - -//-------------------------------------------------------------------------------------------------- -// Pause implementation -//-------------------------------------------------------------------------------------------------- - -static inline void machine_pause(int32_t delay) { -#if __TBB_x86_64 || __TBB_x86_32 - while (delay-- > 0) { _mm_pause(); } -#elif __ARM_ARCH_7A__ || __aarch64__ - while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } -#else /* Generic */ - (void)delay; // suppress without including _template_helpers.h - yield(); -#endif -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// tbb::detail::log2() implementation -//////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO: Use log2p1() function that will be available in C++20 standard - -#if defined(__GNUC__) || defined(__clang__) -namespace gnu_builtins { - inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); } - inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); } - inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); } -} -#elif defined(_MSC_VER) -#pragma intrinsic(__TBB_W(_BitScanReverse)) -namespace msvc_intrinsics { - static inline uintptr_t bit_scan_reverse(uintptr_t i) { - unsigned long j; - __TBB_W(_BitScanReverse)( &j, i ); - return j; - } -} -#endif - -template <typename T> -constexpr std::uintptr_t number_of_bits() { - return sizeof(T) * CHAR_BIT; -} - -// logarithm is the index of the most significant non-zero bit -static inline uintptr_t machine_log2(uintptr_t x) { -#if defined(__GNUC__) || defined(__clang__) - // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x - return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x); -#elif defined(_MSC_VER) - return msvc_intrinsics::bit_scan_reverse(x); -#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__ - uintptr_t j, i = x; - __asm__("bsr %1,%0" : "=r"(j) : "r"(i)); - return j; -#elif __powerpc__ || __POWERPC__ - #if __TBB_WORDSIZE==8 - __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); - return 63 - static_cast<intptr_t>(x); - #else - __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); - return 31 - static_cast<intptr_t>(x); - #endif /*__TBB_WORDSIZE*/ -#elif __sparc - uint64_t count; - // one hot encode - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - x |= (x >> 32); - // count 1's - __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); - return count - 1; -#else - intptr_t result = 0; - - if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; } - if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; } - if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; } - if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; } - if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; } - - return (x & 2) ? result + 1 : result; -#endif -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// tbb::detail::reverse_bits() implementation -//////////////////////////////////////////////////////////////////////////////////////////////////// -#if TBB_USE_CLANG_BITREVERSE_BUILTINS -namespace llvm_builtins { - inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); } - inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); } - inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); } - inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); } -} -#else // generic -template<typename T> -struct reverse { - static const T byte_table[256]; -}; - -template<typename T> -const T reverse<T>::byte_table[256] = { - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, - 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, - 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, - 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, - 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, - 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, - 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, - 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, - 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, - 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, - 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF -}; - -inline unsigned char reverse_byte(unsigned char src) { - return reverse<unsigned char>::byte_table[src]; -} -#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS - -template<typename T> -T machine_reverse_bits(T src) { -#if TBB_USE_CLANG_BITREVERSE_BUILTINS - return builtin_bitreverse(fixed_width_cast(src)); -#else /* Generic */ - T dst; - unsigned char *original = (unsigned char *) &src; - unsigned char *reversed = (unsigned char *) &dst; - - for ( int i = sizeof(T) - 1; i >= 0; i-- ) { - reversed[i] = reverse_byte( original[sizeof(T) - i - 1] ); - } - - return dst; -#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS -} - -} // inline namespace d0 - -namespace d1 { - -#if (_WIN32 || _WIN64) -// API to retrieve/update FPU control setting -#define __TBB_CPU_CTL_ENV_PRESENT 1 -struct cpu_ctl_env { - unsigned int x87cw{}; -#if (__TBB_x86_64) - // Changing the infinity mode or the floating-point precision is not supported on x64. - // The attempt causes an assertion. See - // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2 - static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC; -#else - static constexpr unsigned int X87CW_CONTROL_MASK = ~0U; -#endif -#if (__TBB_x86_32 || __TBB_x86_64) - unsigned int mxcsr{}; - static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */ -#endif - - bool operator!=( const cpu_ctl_env& ctl ) const { - return -#if (__TBB_x86_32 || __TBB_x86_64) - mxcsr != ctl.mxcsr || -#endif - x87cw != ctl.x87cw; - } - void get_env() { - x87cw = _control87(0, 0); -#if (__TBB_x86_32 || __TBB_x86_64) - mxcsr = _mm_getcsr(); -#endif - } - void set_env() const { - _control87(x87cw, X87CW_CONTROL_MASK); -#if (__TBB_x86_32 || __TBB_x86_64) - _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK); -#endif - } -}; -#elif (__TBB_x86_32 || __TBB_x86_64) -// API to retrieve/update FPU control setting -#define __TBB_CPU_CTL_ENV_PRESENT 1 -struct cpu_ctl_env { - int mxcsr{}; - short x87cw{}; - static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */ - - bool operator!=(const cpu_ctl_env& ctl) const { - return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; - } - void get_env() { - __asm__ __volatile__( - "stmxcsr %0\n\t" - "fstcw %1" - : "=m"(mxcsr), "=m"(x87cw) - ); - mxcsr &= MXCSR_CONTROL_MASK; - } - void set_env() const { - __asm__ __volatile__( - "ldmxcsr %0\n\t" - "fldcw %1" - : : "m"(mxcsr), "m"(x87cw) - ); - } -}; -#endif - -} // namespace d1 - -} // namespace detail -} // namespace tbb - -#if !__TBB_CPU_CTL_ENV_PRESENT -#include <fenv.h> - -#include <cstring> - -namespace tbb { -namespace detail { - -namespace r1 { -void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); -void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); -} // namespace r1 - -namespace d1 { - -class cpu_ctl_env { - fenv_t *my_fenv_ptr; -public: - cpu_ctl_env() : my_fenv_ptr(NULL) {} - ~cpu_ctl_env() { - if ( my_fenv_ptr ) - r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); - } - // It is possible not to copy memory but just to copy pointers but the following issues should be addressed: - // 1. The arena lifetime and the context lifetime are independent; - // 2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside - // dispatch loop may become invalid. - // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation - // with a platform specific implementation. - cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { - *this = src; - } - cpu_ctl_env& operator=( const cpu_ctl_env &src ) { - __TBB_ASSERT( src.my_fenv_ptr, NULL ); - if ( !my_fenv_ptr ) - my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); - *my_fenv_ptr = *src.my_fenv_ptr; - return *this; - } - bool operator!=( const cpu_ctl_env &ctl ) const { - __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); - __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." ); - return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) ); - } - void get_env () { - if ( !my_fenv_ptr ) - my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); - fegetenv( my_fenv_ptr ); - } - const cpu_ctl_env& set_env () const { - __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); - fesetenv( my_fenv_ptr ); - return *this; - } -}; - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif /* !__TBB_CPU_CTL_ENV_PRESENT */ - -#endif // __TBB_detail__machine_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__machine_H +#define __TBB_detail__machine_H + +#include "_config.h" +#include "_assert.h" + +#include <atomic> +#include <climits> +#include <cstdint> +#include <cstddef> + +#ifdef _MSC_VER +#include <intrin.h> +#pragma intrinsic(__rdtsc) +#endif +#if __TBB_x86_64 || __TBB_x86_32 +#include <immintrin.h> // _mm_pause +#endif +#if (_WIN32 || _WIN64) +#include <float.h> // _control87 +#endif + +#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN +#include <sched.h> // sched_yield +#else +#include <thread> // std::this_thread::yield() +#endif + +namespace tbb { +namespace detail { +inline namespace d0 { + +//-------------------------------------------------------------------------------------------------- +// Yield implementation +//-------------------------------------------------------------------------------------------------- + +#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN +static inline void yield() { + int err = sched_yield(); + __TBB_ASSERT_EX(err == 0, "sched_yiled has failed"); +} +#else +using std::this_thread::yield; +#endif + +//-------------------------------------------------------------------------------------------------- +// atomic_fence implementation +//-------------------------------------------------------------------------------------------------- + +#if (_WIN32 || _WIN64) +#pragma intrinsic(_mm_mfence) +#endif + +static inline void atomic_fence(std::memory_order order) { +#if (_WIN32 || _WIN64) + if (order == std::memory_order_seq_cst || + order == std::memory_order_acq_rel || + order == std::memory_order_acquire || + order == std::memory_order_release ) + { + _mm_mfence(); + return; + } +#endif /*(_WIN32 || _WIN64)*/ + std::atomic_thread_fence(order); +} + +//-------------------------------------------------------------------------------------------------- +// Pause implementation +//-------------------------------------------------------------------------------------------------- + +static inline void machine_pause(int32_t delay) { +#if __TBB_x86_64 || __TBB_x86_32 + while (delay-- > 0) { _mm_pause(); } +#elif __ARM_ARCH_7A__ || __aarch64__ + while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } +#else /* Generic */ + (void)delay; // suppress without including _template_helpers.h + yield(); +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// tbb::detail::log2() implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// +// TODO: Use log2p1() function that will be available in C++20 standard + +#if defined(__GNUC__) || defined(__clang__) +namespace gnu_builtins { + inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); } + inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); } + inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); } +} +#elif defined(_MSC_VER) +#pragma intrinsic(__TBB_W(_BitScanReverse)) +namespace msvc_intrinsics { + static inline uintptr_t bit_scan_reverse(uintptr_t i) { + unsigned long j; + __TBB_W(_BitScanReverse)( &j, i ); + return j; + } +} +#endif + +template <typename T> +constexpr std::uintptr_t number_of_bits() { + return sizeof(T) * CHAR_BIT; +} + +// logarithm is the index of the most significant non-zero bit +static inline uintptr_t machine_log2(uintptr_t x) { +#if defined(__GNUC__) || defined(__clang__) + // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x + return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x); +#elif defined(_MSC_VER) + return msvc_intrinsics::bit_scan_reverse(x); +#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__ + uintptr_t j, i = x; + __asm__("bsr %1,%0" : "=r"(j) : "r"(i)); + return j; +#elif __powerpc__ || __POWERPC__ + #if __TBB_WORDSIZE==8 + __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); + return 63 - static_cast<intptr_t>(x); + #else + __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); + return 31 - static_cast<intptr_t>(x); + #endif /*__TBB_WORDSIZE*/ +#elif __sparc + uint64_t count; + // one hot encode + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + // count 1's + __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); + return count - 1; +#else + intptr_t result = 0; + + if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; } + if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; } + if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; } + if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; } + if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; } + + return (x & 2) ? result + 1 : result; +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// tbb::detail::reverse_bits() implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if TBB_USE_CLANG_BITREVERSE_BUILTINS +namespace llvm_builtins { + inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); } + inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); } + inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); } + inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); } +} +#else // generic +template<typename T> +struct reverse { + static const T byte_table[256]; +}; + +template<typename T> +const T reverse<T>::byte_table[256] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; + +inline unsigned char reverse_byte(unsigned char src) { + return reverse<unsigned char>::byte_table[src]; +} +#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS + +template<typename T> +T machine_reverse_bits(T src) { +#if TBB_USE_CLANG_BITREVERSE_BUILTINS + return builtin_bitreverse(fixed_width_cast(src)); +#else /* Generic */ + T dst; + unsigned char *original = (unsigned char *) &src; + unsigned char *reversed = (unsigned char *) &dst; + + for ( int i = sizeof(T) - 1; i >= 0; i-- ) { + reversed[i] = reverse_byte( original[sizeof(T) - i - 1] ); + } + + return dst; +#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS +} + +} // inline namespace d0 + +namespace d1 { + +#if (_WIN32 || _WIN64) +// API to retrieve/update FPU control setting +#define __TBB_CPU_CTL_ENV_PRESENT 1 +struct cpu_ctl_env { + unsigned int x87cw{}; +#if (__TBB_x86_64) + // Changing the infinity mode or the floating-point precision is not supported on x64. + // The attempt causes an assertion. See + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2 + static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC; +#else + static constexpr unsigned int X87CW_CONTROL_MASK = ~0U; +#endif +#if (__TBB_x86_32 || __TBB_x86_64) + unsigned int mxcsr{}; + static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */ +#endif + + bool operator!=( const cpu_ctl_env& ctl ) const { + return +#if (__TBB_x86_32 || __TBB_x86_64) + mxcsr != ctl.mxcsr || +#endif + x87cw != ctl.x87cw; + } + void get_env() { + x87cw = _control87(0, 0); +#if (__TBB_x86_32 || __TBB_x86_64) + mxcsr = _mm_getcsr(); +#endif + } + void set_env() const { + _control87(x87cw, X87CW_CONTROL_MASK); +#if (__TBB_x86_32 || __TBB_x86_64) + _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK); +#endif + } +}; +#elif (__TBB_x86_32 || __TBB_x86_64) +// API to retrieve/update FPU control setting +#define __TBB_CPU_CTL_ENV_PRESENT 1 +struct cpu_ctl_env { + int mxcsr{}; + short x87cw{}; + static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */ + + bool operator!=(const cpu_ctl_env& ctl) const { + return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; + } + void get_env() { + __asm__ __volatile__( + "stmxcsr %0\n\t" + "fstcw %1" + : "=m"(mxcsr), "=m"(x87cw) + ); + mxcsr &= MXCSR_CONTROL_MASK; + } + void set_env() const { + __asm__ __volatile__( + "ldmxcsr %0\n\t" + "fldcw %1" + : : "m"(mxcsr), "m"(x87cw) + ); + } +}; +#endif + +} // namespace d1 + +} // namespace detail +} // namespace tbb + +#if !__TBB_CPU_CTL_ENV_PRESENT +#include <fenv.h> + +#include <cstring> + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +} // namespace r1 + +namespace d1 { + +class cpu_ctl_env { + fenv_t *my_fenv_ptr; +public: + cpu_ctl_env() : my_fenv_ptr(NULL) {} + ~cpu_ctl_env() { + if ( my_fenv_ptr ) + r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); + } + // It is possible not to copy memory but just to copy pointers but the following issues should be addressed: + // 1. The arena lifetime and the context lifetime are independent; + // 2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside + // dispatch loop may become invalid. + // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation + // with a platform specific implementation. + cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { + *this = src; + } + cpu_ctl_env& operator=( const cpu_ctl_env &src ) { + __TBB_ASSERT( src.my_fenv_ptr, NULL ); + if ( !my_fenv_ptr ) + my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); + *my_fenv_ptr = *src.my_fenv_ptr; + return *this; + } + bool operator!=( const cpu_ctl_env &ctl ) const { + __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); + __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." ); + return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) ); + } + void get_env () { + if ( !my_fenv_ptr ) + my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); + fegetenv( my_fenv_ptr ); + } + const cpu_ctl_env& set_env () const { + __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); + fesetenv( my_fenv_ptr ); + return *this; + } +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* !__TBB_CPU_CTL_ENV_PRESENT */ + +#endif // __TBB_detail__machine_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h index 2e1df30931..325af0a680 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h @@ -1,24 +1,24 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// All public entities of the OneAPI Spec are available under oneapi namespace - -// Define tbb namespace first as it might not be known yet -namespace tbb {} - -namespace oneapi { -namespace tbb = ::tbb; -} +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// All public entities of the OneAPI Spec are available under oneapi namespace + +// Define tbb namespace first as it might not be known yet +namespace tbb {} + +namespace oneapi { +namespace tbb = ::tbb; +} diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h index 265be07555..d669c1f721 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h @@ -1,162 +1,162 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__node_handle_H -#define __TBB_detail__node_handle_H - -#include "_allocator_traits.h" -#include "_assert.h" - -namespace tbb { -namespace detail { -namespace d1 { - -// A structure to access private node handle methods in internal TBB classes -// Regular friend declaration is not convenient because classes which use node handle -// can be placed in the different versioning namespaces. -struct node_handle_accessor { - template <typename NodeHandleType> - static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) { - return nh.get_node_ptr(); - } - - template <typename NodeHandleType> - static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) { - return NodeHandleType{node_ptr}; - } - - template <typename NodeHandleType> - static void deactivate( NodeHandleType& nh ) { - nh.deactivate(); - } -}; // struct node_handle_accessor - -template<typename Value, typename Node, typename Allocator> -class node_handle_base { -public: - using allocator_type = Allocator; -protected: - using node = Node; - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; -public: - - node_handle_base() : my_node(nullptr), my_allocator() {} - node_handle_base(node_handle_base&& nh) : my_node(nh.my_node), - my_allocator(std::move(nh.my_allocator)) { - nh.my_node = nullptr; - } - - __TBB_nodiscard bool empty() const { return my_node == nullptr; } - explicit operator bool() const { return my_node != nullptr; } - - ~node_handle_base() { internal_destroy(); } - - node_handle_base& operator=( node_handle_base&& nh ) { - internal_destroy(); - my_node = nh.my_node; - move_assign_allocators(my_allocator, nh.my_allocator); - nh.deactivate(); - return *this; - } - - void swap( node_handle_base& nh ) { - using std::swap; - swap(my_node, nh.my_node); - swap_allocators(my_allocator, nh.my_allocator); - } - - allocator_type get_allocator() const { - return my_allocator; - } - -protected: - node_handle_base( node* n ) : my_node(n) {} - - void internal_destroy() { - if(my_node != nullptr) { - allocator_traits_type::destroy(my_allocator, my_node->storage()); - typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator); - node_allocator.deallocate(my_node, 1); - } - } - - node* get_node_ptr() { return my_node; } - - void deactivate() { my_node = nullptr; } - - node* my_node; - allocator_type my_allocator; -}; - -// node handle for maps -template<typename Key, typename Value, typename Node, typename Allocator> -class node_handle : public node_handle_base<Value, Node, Allocator> { - using base_type = node_handle_base<Value, Node, Allocator>; -public: - using key_type = Key; - using mapped_type = typename Value::second_type; - using allocator_type = typename base_type::allocator_type; - - node_handle() = default; - - key_type& key() const { - __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object"); - return *const_cast<key_type*>(&(this->my_node->value().first)); - } - - mapped_type& mapped() const { - __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object"); - return this->my_node->value().second; - } - -private: - friend struct node_handle_accessor; - - node_handle( typename base_type::node* n ) : base_type(n) {} -}; // class node_handle - -// node handle for sets -template<typename Key, typename Node, typename Allocator> -class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> { - using base_type = node_handle_base<Key, Node, Allocator>; -public: - using value_type = Key; - using allocator_type = typename base_type::allocator_type; - - node_handle() = default; - - value_type& value() const { - __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object"); - return *const_cast<value_type*>(&(this->my_node->value())); - } - -private: - friend struct node_handle_accessor; - - node_handle( typename base_type::node* n ) : base_type(n) {} -}; // class node_handle - -template <typename Key, typename Value, typename Node, typename Allocator> -void swap( node_handle<Key, Value, Node, Allocator>& lhs, - node_handle<Key, Value, Node, Allocator>& rhs ) { - return lhs.swap(rhs); -} - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__node_handle_H +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__node_handle_H +#define __TBB_detail__node_handle_H + +#include "_allocator_traits.h" +#include "_assert.h" + +namespace tbb { +namespace detail { +namespace d1 { + +// A structure to access private node handle methods in internal TBB classes +// Regular friend declaration is not convenient because classes which use node handle +// can be placed in the different versioning namespaces. +struct node_handle_accessor { + template <typename NodeHandleType> + static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) { + return nh.get_node_ptr(); + } + + template <typename NodeHandleType> + static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) { + return NodeHandleType{node_ptr}; + } + + template <typename NodeHandleType> + static void deactivate( NodeHandleType& nh ) { + nh.deactivate(); + } +}; // struct node_handle_accessor + +template<typename Value, typename Node, typename Allocator> +class node_handle_base { +public: + using allocator_type = Allocator; +protected: + using node = Node; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; +public: + + node_handle_base() : my_node(nullptr), my_allocator() {} + node_handle_base(node_handle_base&& nh) : my_node(nh.my_node), + my_allocator(std::move(nh.my_allocator)) { + nh.my_node = nullptr; + } + + __TBB_nodiscard bool empty() const { return my_node == nullptr; } + explicit operator bool() const { return my_node != nullptr; } + + ~node_handle_base() { internal_destroy(); } + + node_handle_base& operator=( node_handle_base&& nh ) { + internal_destroy(); + my_node = nh.my_node; + move_assign_allocators(my_allocator, nh.my_allocator); + nh.deactivate(); + return *this; + } + + void swap( node_handle_base& nh ) { + using std::swap; + swap(my_node, nh.my_node); + swap_allocators(my_allocator, nh.my_allocator); + } + + allocator_type get_allocator() const { + return my_allocator; + } + +protected: + node_handle_base( node* n ) : my_node(n) {} + + void internal_destroy() { + if(my_node != nullptr) { + allocator_traits_type::destroy(my_allocator, my_node->storage()); + typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator); + node_allocator.deallocate(my_node, 1); + } + } + + node* get_node_ptr() { return my_node; } + + void deactivate() { my_node = nullptr; } + + node* my_node; + allocator_type my_allocator; +}; + +// node handle for maps +template<typename Key, typename Value, typename Node, typename Allocator> +class node_handle : public node_handle_base<Value, Node, Allocator> { + using base_type = node_handle_base<Value, Node, Allocator>; +public: + using key_type = Key; + using mapped_type = typename Value::second_type; + using allocator_type = typename base_type::allocator_type; + + node_handle() = default; + + key_type& key() const { + __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object"); + return *const_cast<key_type*>(&(this->my_node->value().first)); + } + + mapped_type& mapped() const { + __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object"); + return this->my_node->value().second; + } + +private: + friend struct node_handle_accessor; + + node_handle( typename base_type::node* n ) : base_type(n) {} +}; // class node_handle + +// node handle for sets +template<typename Key, typename Node, typename Allocator> +class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> { + using base_type = node_handle_base<Key, Node, Allocator>; +public: + using value_type = Key; + using allocator_type = typename base_type::allocator_type; + + node_handle() = default; + + value_type& value() const { + __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object"); + return *const_cast<value_type*>(&(this->my_node->value())); + } + +private: + friend struct node_handle_accessor; + + node_handle( typename base_type::node* n ) : base_type(n) {} +}; // class node_handle + +template <typename Key, typename Value, typename Node, typename Allocator> +void swap( node_handle<Key, Value, Node, Allocator>& lhs, + node_handle<Key, Value, Node, Allocator>& rhs ) { + return lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__node_handle_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h index 95a4d3dc96..a1ce306c14 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h @@ -1,453 +1,453 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_filters_H -#define __TBB_parallel_filters_H - -#include "_config.h" -#include "_task.h" -#include "_pipeline_filters_deduction.h" -#include "../tbb_allocator.h" - -#include <cstddef> -#include <cstdint> - -namespace tbb { -namespace detail { - -namespace d1 { -class base_filter; -} - -namespace r1 { -void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); -class pipeline; -class stage_task; -class input_buffer; -} - -namespace d1 { -class filter_node; - -//! A stage in a pipeline. -/** @ingroup algorithms */ -class base_filter{ -private: - //! Value used to mark "not in pipeline" - static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); } -public: - //! The lowest bit 0 is for parallel vs serial - static constexpr unsigned int filter_is_serial = 0x1; - - //! 2nd bit distinguishes ordered vs unordered filters. - static constexpr unsigned int filter_is_out_of_order = 0x1<<1; - - //! 3rd bit marks input filters emitting small objects - static constexpr unsigned int filter_may_emit_null = 0x1<<2; - - base_filter(const base_filter&) = delete; - base_filter& operator=(const base_filter&) = delete; - -protected: - explicit base_filter( unsigned int m ) : - next_filter_in_pipeline(not_in_pipeline()), - my_input_buffer(nullptr), - my_filter_mode(m), - my_pipeline(nullptr) - {} - - // signal end-of-input for concrete_filters - void set_end_of_input() { - r1::set_end_of_input(*this); - } - -public: - //! True if filter is serial. - bool is_serial() const { - return bool( my_filter_mode & filter_is_serial ); - } - - //! True if filter must receive stream in order. - bool is_ordered() const { - return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order); - } - - //! true if an input filter can emit null - bool object_may_be_null() { - return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null; - } - - //! Operate on an item from the input stream, and return item for output stream. - /** Returns nullptr if filter is a sink. */ - virtual void* operator()( void* item ) = 0; - - //! Destroy filter. - virtual ~base_filter() {}; - - //! Destroys item if pipeline was cancelled. - /** Required to prevent memory leaks. - Note it can be called concurrently even for serial filters.*/ - virtual void finalize( void* /*item*/ ) {} - -private: - //! Pointer to next filter in the pipeline. - base_filter* next_filter_in_pipeline; - - //! Buffer for incoming tokens, or nullptr if not required. - /** The buffer is required if the filter is serial. */ - r1::input_buffer* my_input_buffer; - - friend class r1::stage_task; - friend class r1::pipeline; - friend void r1::set_end_of_input(d1::base_filter&); - - //! Storage for filter mode and dynamically checked implementation version. - const unsigned int my_filter_mode; - - //! Pointer to the pipeline. - r1::pipeline* my_pipeline; -}; - -template<typename Body, typename InputType, typename OutputType > -class concrete_filter; - -//! input_filter control to signal end-of-input for parallel_pipeline -class flow_control { - bool is_pipeline_stopped = false; - flow_control() = default; - template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; - template<typename Output> friend class input_node; -public: - void stop() { is_pipeline_stopped = true; } -}; - -// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe). -#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT -template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>; -#else -template<typename T> struct tbb_trivially_copyable { enum { value = false }; }; -template<typename T> struct tbb_trivially_copyable < T* > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < bool > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < char > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < signed char > { enum { value = true }; }; -template<> struct tbb_trivially_copyable <unsigned char > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < short > { enum { value = true }; }; -template<> struct tbb_trivially_copyable <unsigned short > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < int > { enum { value = true }; }; -template<> struct tbb_trivially_copyable <unsigned int > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < long > { enum { value = true }; }; -template<> struct tbb_trivially_copyable <unsigned long > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < long long> { enum { value = true }; }; -template<> struct tbb_trivially_copyable <unsigned long long> { enum { value = true }; }; -template<> struct tbb_trivially_copyable < float > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < double > { enum { value = true }; }; -template<> struct tbb_trivially_copyable < long double > { enum { value = true }; }; -#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT - -template<typename T> -struct use_allocator { - static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value; -}; - -// A helper class to customize how a type is passed between filters. -// Usage: token_helper<T, use_allocator<T>::value> -template<typename T, bool Allocate> struct token_helper; - -// using tbb_allocator -template<typename T> -struct token_helper<T, true> { - using pointer = T*; - using value_type = T; - static pointer create_token(value_type && source) { - return new (r1::allocate_memory(sizeof(T))) T(std::move(source)); - } - static value_type & token(pointer & t) { return *t; } - static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } - static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } - static void destroy_token(pointer token) { - token->~value_type(); - r1::deallocate_memory(token); - } -}; - -// pointer specialization -template<typename T> -struct token_helper<T*, false> { - using pointer = T*; - using value_type = T*; - static pointer create_token(const value_type & source) { return source; } - static value_type & token(pointer & t) { return t; } - static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } - static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } - static void destroy_token( pointer /*token*/) {} -}; - -// converting type to and from void*, passing objects directly -template<typename T> -struct token_helper<T, false> { - typedef union { - T actual_value; - void * void_overlay; - } type_to_void_ptr_map; - using pointer = T; // not really a pointer in this case. - using value_type = T; - static pointer create_token(const value_type & source) { return source; } - static value_type & token(pointer & t) { return t; } - static void * cast_to_void_ptr(pointer ref) { - type_to_void_ptr_map mymap; - mymap.void_overlay = nullptr; - mymap.actual_value = ref; - return mymap.void_overlay; - } - static pointer cast_from_void_ptr(void * ref) { - type_to_void_ptr_map mymap; - mymap.void_overlay = ref; - return mymap.actual_value; - } - static void destroy_token( pointer /*token*/) {} -}; - -// intermediate -template<typename InputType, typename OutputType, typename Body> -class concrete_filter: public base_filter { - const Body& my_body; - using input_helper = token_helper<InputType, use_allocator<InputType >::value>; - using input_pointer = typename input_helper::pointer; - using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; - using output_pointer = typename output_helper::pointer; - - void* operator()(void* input) override { - input_pointer temp_input = input_helper::cast_from_void_ptr(input); - output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input)))); - input_helper::destroy_token(temp_input); - return output_helper::cast_to_void_ptr(temp_output); - } - - void finalize(void * input) override { - input_pointer temp_input = input_helper::cast_from_void_ptr(input); - input_helper::destroy_token(temp_input); - } - -public: - concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} -}; - -// input -template<typename OutputType, typename Body> -class concrete_filter<void, OutputType, Body>: public base_filter { - const Body& my_body; - using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; - using output_pointer = typename output_helper::pointer; - - void* operator()(void*) override { - flow_control control; - output_pointer temp_output = output_helper::create_token(my_body(control)); - if(control.is_pipeline_stopped) { - output_helper::destroy_token(temp_output); - set_end_of_input(); - return nullptr; - } - return output_helper::cast_to_void_ptr(temp_output); - } - -public: - concrete_filter(unsigned int m, const Body& body) : - base_filter(m | filter_may_emit_null), - my_body(body) - {} -}; - -// output -template<typename InputType, typename Body> -class concrete_filter<InputType, void, Body>: public base_filter { - const Body& my_body; - using input_helper = token_helper<InputType, use_allocator<InputType >::value>; - using input_pointer = typename input_helper::pointer; - - void* operator()(void* input) override { - input_pointer temp_input = input_helper::cast_from_void_ptr(input); - my_body(std::move(input_helper::token(temp_input))); - input_helper::destroy_token(temp_input); - return nullptr; - } - void finalize(void* input) override { - input_pointer temp_input = input_helper::cast_from_void_ptr(input); - input_helper::destroy_token(temp_input); - } - -public: - concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} -}; - -template<typename Body> -class concrete_filter<void, void, Body>: public base_filter { - const Body& my_body; - - void* operator()(void*) override { - flow_control control; - my_body(control); - void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1; - return output; - } -public: - concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} -}; - -class filter_node_ptr { - filter_node * my_node; - -public: - filter_node_ptr() : my_node(nullptr) {} - filter_node_ptr(filter_node *); - ~filter_node_ptr(); - filter_node_ptr(const filter_node_ptr &); - filter_node_ptr(filter_node_ptr &&); - void operator=(filter_node *); - void operator=(const filter_node_ptr &); - void operator=(filter_node_ptr &&); - filter_node& operator*() const; - operator bool() const; -}; - -//! Abstract base class that represents a node in a parse tree underlying a filter class. -/** These nodes are always heap-allocated and can be shared by filter objects. */ -class filter_node { - /** Count must be atomic because it is hidden state for user, but might be shared by threads. */ - std::atomic<std::intptr_t> ref_count; -public: - filter_node_ptr left; - filter_node_ptr right; -protected: - filter_node() : ref_count(0), left(nullptr), right(nullptr) { -#ifdef __TBB_TEST_FILTER_NODE_COUNT - ++(__TBB_TEST_FILTER_NODE_COUNT); -#endif - } -public: - filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){ - left = x; - right = y; - } - filter_node(const filter_node&) = delete; - filter_node& operator=(const filter_node&) = delete; - - //! Add concrete_filter to pipeline - virtual base_filter* create_filter() const { - __TBB_ASSERT(false, "method of non-leaf was called"); - return nullptr; - } - - //! Increment reference count - void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); } - - //! Decrement reference count and delete if it becomes zero. - void remove_ref() { - __TBB_ASSERT(ref_count>0,"ref_count underflow"); - if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) { - this->~filter_node(); - r1::deallocate_memory(this); - } - } - - virtual ~filter_node() { -#ifdef __TBB_TEST_FILTER_NODE_COUNT - --(__TBB_TEST_FILTER_NODE_COUNT); -#endif - } -}; - -inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) { - if (my_node) { - my_node->add_ref(); - } -} - -inline filter_node_ptr::~filter_node_ptr() { - if (my_node) { - my_node->remove_ref(); - } -} - -inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) { - if (my_node) { - my_node->add_ref(); - } -} - -inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) { - rhs.my_node = nullptr; -} - -inline void filter_node_ptr::operator=(filter_node * rhs) { - // Order of operations below carefully chosen so that reference counts remain correct - // in unlikely event that remove_ref throws exception. - filter_node* old = my_node; - my_node = rhs; - if (my_node) { - my_node->add_ref(); - } - if (old) { - old->remove_ref(); - } -} - -inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) { - *this = rhs.my_node; -} - -inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { - filter_node* old = my_node; - my_node = rhs.my_node; - rhs.my_node = nullptr; - if (old) { - old->remove_ref(); - } -} - -inline filter_node& filter_node_ptr::operator*() const{ - __TBB_ASSERT(my_node,"NULL node is used"); - return *my_node; -} - -inline filter_node_ptr::operator bool() const { - return my_node != nullptr; -} - -//! Node in parse tree representing result of make_filter. -template<typename InputType, typename OutputType, typename Body> -class filter_node_leaf: public filter_node { - const unsigned int my_mode; - const Body my_body; - base_filter* create_filter() const override { - return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body); - } -public: - filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {} -}; - - -template <typename Body, typename Input = typename body_types<decltype(&Body::operator())>::input_type> -using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type; - -template <typename Body> -using filter_output = typename body_types<decltype(&Body::operator())>::output_type; - -} // namespace d1 -} // namespace detail -} // namespace tbb - - -#endif /* __TBB_parallel_filters_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_filters_H +#define __TBB_parallel_filters_H + +#include "_config.h" +#include "_task.h" +#include "_pipeline_filters_deduction.h" +#include "../tbb_allocator.h" + +#include <cstddef> +#include <cstdint> + +namespace tbb { +namespace detail { + +namespace d1 { +class base_filter; +} + +namespace r1 { +void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); +class pipeline; +class stage_task; +class input_buffer; +} + +namespace d1 { +class filter_node; + +//! A stage in a pipeline. +/** @ingroup algorithms */ +class base_filter{ +private: + //! Value used to mark "not in pipeline" + static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); } +public: + //! The lowest bit 0 is for parallel vs serial + static constexpr unsigned int filter_is_serial = 0x1; + + //! 2nd bit distinguishes ordered vs unordered filters. + static constexpr unsigned int filter_is_out_of_order = 0x1<<1; + + //! 3rd bit marks input filters emitting small objects + static constexpr unsigned int filter_may_emit_null = 0x1<<2; + + base_filter(const base_filter&) = delete; + base_filter& operator=(const base_filter&) = delete; + +protected: + explicit base_filter( unsigned int m ) : + next_filter_in_pipeline(not_in_pipeline()), + my_input_buffer(nullptr), + my_filter_mode(m), + my_pipeline(nullptr) + {} + + // signal end-of-input for concrete_filters + void set_end_of_input() { + r1::set_end_of_input(*this); + } + +public: + //! True if filter is serial. + bool is_serial() const { + return bool( my_filter_mode & filter_is_serial ); + } + + //! True if filter must receive stream in order. + bool is_ordered() const { + return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order); + } + + //! true if an input filter can emit null + bool object_may_be_null() { + return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null; + } + + //! Operate on an item from the input stream, and return item for output stream. + /** Returns nullptr if filter is a sink. */ + virtual void* operator()( void* item ) = 0; + + //! Destroy filter. + virtual ~base_filter() {}; + + //! Destroys item if pipeline was cancelled. + /** Required to prevent memory leaks. + Note it can be called concurrently even for serial filters.*/ + virtual void finalize( void* /*item*/ ) {} + +private: + //! Pointer to next filter in the pipeline. + base_filter* next_filter_in_pipeline; + + //! Buffer for incoming tokens, or nullptr if not required. + /** The buffer is required if the filter is serial. */ + r1::input_buffer* my_input_buffer; + + friend class r1::stage_task; + friend class r1::pipeline; + friend void r1::set_end_of_input(d1::base_filter&); + + //! Storage for filter mode and dynamically checked implementation version. + const unsigned int my_filter_mode; + + //! Pointer to the pipeline. + r1::pipeline* my_pipeline; +}; + +template<typename Body, typename InputType, typename OutputType > +class concrete_filter; + +//! input_filter control to signal end-of-input for parallel_pipeline +class flow_control { + bool is_pipeline_stopped = false; + flow_control() = default; + template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; + template<typename Output> friend class input_node; +public: + void stop() { is_pipeline_stopped = true; } +}; + +// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe). +#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT +template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>; +#else +template<typename T> struct tbb_trivially_copyable { enum { value = false }; }; +template<typename T> struct tbb_trivially_copyable < T* > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < bool > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < signed char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < short > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned short > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < int > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned int > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned long > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long long> { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned long long> { enum { value = true }; }; +template<> struct tbb_trivially_copyable < float > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < double > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long double > { enum { value = true }; }; +#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT + +template<typename T> +struct use_allocator { + static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value; +}; + +// A helper class to customize how a type is passed between filters. +// Usage: token_helper<T, use_allocator<T>::value> +template<typename T, bool Allocate> struct token_helper; + +// using tbb_allocator +template<typename T> +struct token_helper<T, true> { + using pointer = T*; + using value_type = T; + static pointer create_token(value_type && source) { + return new (r1::allocate_memory(sizeof(T))) T(std::move(source)); + } + static value_type & token(pointer & t) { return *t; } + static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } + static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } + static void destroy_token(pointer token) { + token->~value_type(); + r1::deallocate_memory(token); + } +}; + +// pointer specialization +template<typename T> +struct token_helper<T*, false> { + using pointer = T*; + using value_type = T*; + static pointer create_token(const value_type & source) { return source; } + static value_type & token(pointer & t) { return t; } + static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } + static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } + static void destroy_token( pointer /*token*/) {} +}; + +// converting type to and from void*, passing objects directly +template<typename T> +struct token_helper<T, false> { + typedef union { + T actual_value; + void * void_overlay; + } type_to_void_ptr_map; + using pointer = T; // not really a pointer in this case. + using value_type = T; + static pointer create_token(const value_type & source) { return source; } + static value_type & token(pointer & t) { return t; } + static void * cast_to_void_ptr(pointer ref) { + type_to_void_ptr_map mymap; + mymap.void_overlay = nullptr; + mymap.actual_value = ref; + return mymap.void_overlay; + } + static pointer cast_from_void_ptr(void * ref) { + type_to_void_ptr_map mymap; + mymap.void_overlay = ref; + return mymap.actual_value; + } + static void destroy_token( pointer /*token*/) {} +}; + +// intermediate +template<typename InputType, typename OutputType, typename Body> +class concrete_filter: public base_filter { + const Body& my_body; + using input_helper = token_helper<InputType, use_allocator<InputType >::value>; + using input_pointer = typename input_helper::pointer; + using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; + using output_pointer = typename output_helper::pointer; + + void* operator()(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input)))); + input_helper::destroy_token(temp_input); + return output_helper::cast_to_void_ptr(temp_output); + } + + void finalize(void * input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + input_helper::destroy_token(temp_input); + } + +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +// input +template<typename OutputType, typename Body> +class concrete_filter<void, OutputType, Body>: public base_filter { + const Body& my_body; + using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; + using output_pointer = typename output_helper::pointer; + + void* operator()(void*) override { + flow_control control; + output_pointer temp_output = output_helper::create_token(my_body(control)); + if(control.is_pipeline_stopped) { + output_helper::destroy_token(temp_output); + set_end_of_input(); + return nullptr; + } + return output_helper::cast_to_void_ptr(temp_output); + } + +public: + concrete_filter(unsigned int m, const Body& body) : + base_filter(m | filter_may_emit_null), + my_body(body) + {} +}; + +// output +template<typename InputType, typename Body> +class concrete_filter<InputType, void, Body>: public base_filter { + const Body& my_body; + using input_helper = token_helper<InputType, use_allocator<InputType >::value>; + using input_pointer = typename input_helper::pointer; + + void* operator()(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + my_body(std::move(input_helper::token(temp_input))); + input_helper::destroy_token(temp_input); + return nullptr; + } + void finalize(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + input_helper::destroy_token(temp_input); + } + +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +template<typename Body> +class concrete_filter<void, void, Body>: public base_filter { + const Body& my_body; + + void* operator()(void*) override { + flow_control control; + my_body(control); + void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1; + return output; + } +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +class filter_node_ptr { + filter_node * my_node; + +public: + filter_node_ptr() : my_node(nullptr) {} + filter_node_ptr(filter_node *); + ~filter_node_ptr(); + filter_node_ptr(const filter_node_ptr &); + filter_node_ptr(filter_node_ptr &&); + void operator=(filter_node *); + void operator=(const filter_node_ptr &); + void operator=(filter_node_ptr &&); + filter_node& operator*() const; + operator bool() const; +}; + +//! Abstract base class that represents a node in a parse tree underlying a filter class. +/** These nodes are always heap-allocated and can be shared by filter objects. */ +class filter_node { + /** Count must be atomic because it is hidden state for user, but might be shared by threads. */ + std::atomic<std::intptr_t> ref_count; +public: + filter_node_ptr left; + filter_node_ptr right; +protected: + filter_node() : ref_count(0), left(nullptr), right(nullptr) { +#ifdef __TBB_TEST_FILTER_NODE_COUNT + ++(__TBB_TEST_FILTER_NODE_COUNT); +#endif + } +public: + filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){ + left = x; + right = y; + } + filter_node(const filter_node&) = delete; + filter_node& operator=(const filter_node&) = delete; + + //! Add concrete_filter to pipeline + virtual base_filter* create_filter() const { + __TBB_ASSERT(false, "method of non-leaf was called"); + return nullptr; + } + + //! Increment reference count + void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); } + + //! Decrement reference count and delete if it becomes zero. + void remove_ref() { + __TBB_ASSERT(ref_count>0,"ref_count underflow"); + if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) { + this->~filter_node(); + r1::deallocate_memory(this); + } + } + + virtual ~filter_node() { +#ifdef __TBB_TEST_FILTER_NODE_COUNT + --(__TBB_TEST_FILTER_NODE_COUNT); +#endif + } +}; + +inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) { + if (my_node) { + my_node->add_ref(); + } +} + +inline filter_node_ptr::~filter_node_ptr() { + if (my_node) { + my_node->remove_ref(); + } +} + +inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) { + if (my_node) { + my_node->add_ref(); + } +} + +inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) { + rhs.my_node = nullptr; +} + +inline void filter_node_ptr::operator=(filter_node * rhs) { + // Order of operations below carefully chosen so that reference counts remain correct + // in unlikely event that remove_ref throws exception. + filter_node* old = my_node; + my_node = rhs; + if (my_node) { + my_node->add_ref(); + } + if (old) { + old->remove_ref(); + } +} + +inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) { + *this = rhs.my_node; +} + +inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { + filter_node* old = my_node; + my_node = rhs.my_node; + rhs.my_node = nullptr; + if (old) { + old->remove_ref(); + } +} + +inline filter_node& filter_node_ptr::operator*() const{ + __TBB_ASSERT(my_node,"NULL node is used"); + return *my_node; +} + +inline filter_node_ptr::operator bool() const { + return my_node != nullptr; +} + +//! Node in parse tree representing result of make_filter. +template<typename InputType, typename OutputType, typename Body> +class filter_node_leaf: public filter_node { + const unsigned int my_mode; + const Body my_body; + base_filter* create_filter() const override { + return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body); + } +public: + filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {} +}; + + +template <typename Body, typename Input = typename body_types<decltype(&Body::operator())>::input_type> +using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type; + +template <typename Body> +using filter_output = typename body_types<decltype(&Body::operator())>::output_type; + +} // namespace d1 +} // namespace detail +} // namespace tbb + + +#endif /* __TBB_parallel_filters_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h index 55f94dce00..d6f483c2ea 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h @@ -1,46 +1,46 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__pipeline_filters_deduction_H -#define __TBB__pipeline_filters_deduction_H - -#include "_config.h" -#include <utility> -#include <type_traits> - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename Input, typename Output> -struct declare_fitler_types { - using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type; - using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type; -}; - -template <typename T> struct body_types; - -template <typename T, typename Input, typename Output> -struct body_types<Output(T::*)(Input) const> : declare_fitler_types<Input, Output> {}; - -template <typename T, typename Input, typename Output> -struct body_types<Output(T::*)(Input)> : declare_fitler_types<Input, Output> {}; - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB__pipeline_filters_deduction_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__pipeline_filters_deduction_H +#define __TBB__pipeline_filters_deduction_H + +#include "_config.h" +#include <utility> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Input, typename Output> +struct declare_fitler_types { + using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type; + using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type; +}; + +template <typename T> struct body_types; + +template <typename T, typename Input, typename Output> +struct body_types<Output(T::*)(Input) const> : declare_fitler_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output(T::*)(Input)> : declare_fitler_types<Input, Output> {}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB__pipeline_filters_deduction_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h index 36c4ca84ee..2146d127a4 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h @@ -1,76 +1,76 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__range_common_H -#define __TBB_detail__range_common_H - -#include "_config.h" -#include "_utils.h" - -namespace tbb { -namespace detail { -inline namespace d0 { - -//! Dummy type that distinguishes splitting constructor from copy constructor. -/** - * See description of parallel_for and parallel_reduce for example usages. - * @ingroup algorithms - */ -class split {}; - -//! Type enables transmission of splitting proportion from partitioners to range objects -/** - * In order to make use of such facility Range objects must implement - * splitting constructor with this type passed. - */ -class proportional_split : no_assign { -public: - proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { } - - size_t left() const { return my_left; } - size_t right() const { return my_right; } - - // used when range does not support proportional split - explicit operator split() const { return split(); } - -private: - size_t my_left, my_right; -}; - -template <typename Range, typename = void> -struct range_split_object_provider { - template <typename PartitionerSplitType> - static split get( PartitionerSplitType& ) { return split(); } -}; - -template <typename Range> -struct range_split_object_provider<Range, - typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> { - template <typename PartitionerSplitType> - static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } -}; - -template <typename Range, typename PartitionerSplitType> -auto get_range_split_object( PartitionerSplitType& split_obj ) --> decltype(range_split_object_provider<Range>::get(split_obj)) { - return range_split_object_provider<Range>::get(split_obj); -} - -} // namespace d0 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__range_common_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__range_common_H +#define __TBB_detail__range_common_H + +#include "_config.h" +#include "_utils.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Dummy type that distinguishes splitting constructor from copy constructor. +/** + * See description of parallel_for and parallel_reduce for example usages. + * @ingroup algorithms + */ +class split {}; + +//! Type enables transmission of splitting proportion from partitioners to range objects +/** + * In order to make use of such facility Range objects must implement + * splitting constructor with this type passed. + */ +class proportional_split : no_assign { +public: + proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { } + + size_t left() const { return my_left; } + size_t right() const { return my_right; } + + // used when range does not support proportional split + explicit operator split() const { return split(); } + +private: + size_t my_left, my_right; +}; + +template <typename Range, typename = void> +struct range_split_object_provider { + template <typename PartitionerSplitType> + static split get( PartitionerSplitType& ) { return split(); } +}; + +template <typename Range> +struct range_split_object_provider<Range, + typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> { + template <typename PartitionerSplitType> + static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } +}; + +template <typename Range, typename PartitionerSplitType> +auto get_range_split_object( PartitionerSplitType& split_obj ) +-> decltype(range_split_object_provider<Range>::get(split_obj)) { + return range_split_object_provider<Range>::get(split_obj); +} + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__range_common_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h index 28ef9f042e..97b077993d 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h @@ -1,162 +1,162 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__rtm_mutex_impl_H -#define __TBB__rtm_mutex_impl_H - -#include "_assert.h" -#include "_utils.h" -#include "../spin_mutex.h" - -#include "../profiling.h" - -namespace tbb { -namespace detail { -namespace r1 { -struct rtm_mutex_impl; -} -namespace d1 { - -#if _MSC_VER && !defined(__INTEL_COMPILER) - // Suppress warning: structure was padded due to alignment specifier - #pragma warning (push) - #pragma warning (disable: 4324) -#endif - -/** A rtm_mutex is an speculation-enabled spin mutex. - It should be used for locking short critical sections where the lock is - contended but the data it protects are not. If zero-initialized, the - mutex is considered unheld. - @ingroup synchronization */ -class alignas(max_nfs_size) rtm_mutex : private spin_mutex { -private: - enum class rtm_state { - rtm_none, - rtm_transacting, - rtm_real - }; -public: - //! Constructors - rtm_mutex() noexcept { - create_itt_sync(this, "tbb::speculative_spin_mutex", ""); - } - - //! Destructor - ~rtm_mutex() = default; - - //! Represents acquisition of a mutex. - class scoped_lock { - public: - friend class rtm_mutex; - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {} - - //! Acquire lock on given mutex. - scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) { - acquire(m); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if(m_transaction_state != rtm_state::rtm_none) { - release(); - } - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock on given mutex. - void acquire(rtm_mutex& m); - - //! Try acquire lock on given mutex. - bool try_acquire(rtm_mutex& m); - - //! Release lock - void release(); - - private: - rtm_mutex* m_mutex; - rtm_state m_transaction_state; - friend r1::rtm_mutex_impl; - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = false; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = false; -private: - friend r1::rtm_mutex_impl; -}; // end of rtm_mutex -} // namespace d1 - -namespace r1 { - //! Internal acquire lock. - // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); - //! Internal try_acquire lock. - bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); - //! Internal release lock. - void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); -} // namespace r1 - -namespace d1 { -//! Acquire lock on given mutex. -inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) { - __TBB_ASSERT(!m_mutex, "lock is already acquired"); - r1::acquire(m, *this); -} - -//! Try acquire lock on given mutex. -inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) { - __TBB_ASSERT(!m_mutex, "lock is already acquired"); - return r1::try_acquire(m, *this); -} - -//! Release lock -inline void rtm_mutex::scoped_lock::release() { - __TBB_ASSERT(m_mutex, "lock is not acquired"); - __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired"); - return r1::release(*this); -} - -#if _MSC_VER && !defined(__INTEL_COMPILER) - #pragma warning (pop) // 4324 warning -#endif - -#if TBB_USE_PROFILING_TOOLS -inline void set_name(rtm_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(rtm_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif // WIN -#else -inline void set_name(rtm_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(rtm_mutex&, const wchar_t*) {} -#endif // WIN -#endif - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif /* __TBB__rtm_mutex_impl_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__rtm_mutex_impl_H +#define __TBB__rtm_mutex_impl_H + +#include "_assert.h" +#include "_utils.h" +#include "../spin_mutex.h" + +#include "../profiling.h" + +namespace tbb { +namespace detail { +namespace r1 { +struct rtm_mutex_impl; +} +namespace d1 { + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning (push) + #pragma warning (disable: 4324) +#endif + +/** A rtm_mutex is an speculation-enabled spin mutex. + It should be used for locking short critical sections where the lock is + contended but the data it protects are not. If zero-initialized, the + mutex is considered unheld. + @ingroup synchronization */ +class alignas(max_nfs_size) rtm_mutex : private spin_mutex { +private: + enum class rtm_state { + rtm_none, + rtm_transacting, + rtm_real + }; +public: + //! Constructors + rtm_mutex() noexcept { + create_itt_sync(this, "tbb::speculative_spin_mutex", ""); + } + + //! Destructor + ~rtm_mutex() = default; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + friend class rtm_mutex; + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {} + + //! Acquire lock on given mutex. + scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) { + acquire(m); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if(m_transaction_state != rtm_state::rtm_none) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire(rtm_mutex& m); + + //! Try acquire lock on given mutex. + bool try_acquire(rtm_mutex& m); + + //! Release lock + void release(); + + private: + rtm_mutex* m_mutex; + rtm_state m_transaction_state; + friend r1::rtm_mutex_impl; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; +private: + friend r1::rtm_mutex_impl; +}; // end of rtm_mutex +} // namespace d1 + +namespace r1 { + //! Internal acquire lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); + //! Internal try_acquire lock. + bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); + //! Internal release lock. + void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); +} // namespace r1 + +namespace d1 { +//! Acquire lock on given mutex. +inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + r1::acquire(m, *this); +} + +//! Try acquire lock on given mutex. +inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + return r1::try_acquire(m, *this); +} + +//! Release lock +inline void rtm_mutex::scoped_lock::release() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired"); + return r1::release(*this); +} + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) // 4324 warning +#endif + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(rtm_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(rtm_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_mutex&, const wchar_t*) {} +#endif // WIN +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__rtm_mutex_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h index b62e86bd0a..0cf64b2dba 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h @@ -1,209 +1,209 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__rtm_rw_mutex_H -#define __TBB_detail__rtm_rw_mutex_H - -#include "_assert.h" -#include "_utils.h" -#include "../spin_rw_mutex.h" - -#include <atomic> - -namespace tbb { -namespace detail { - -namespace r1 { -struct rtm_rw_mutex_impl; -} - -namespace d1 { - -constexpr std::size_t speculation_granularity = 64; -#if _MSC_VER && !defined(__INTEL_COMPILER) - // Suppress warning: structure was padded due to alignment specifier - #pragma warning (push) - #pragma warning (disable: 4324) -#endif - -//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference -/** @ingroup synchronization */ -class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex { - friend struct r1::rtm_rw_mutex_impl; -private: - enum class rtm_type { - rtm_not_in_mutex, - rtm_transacting_reader, - rtm_transacting_writer, - rtm_real_reader, - rtm_real_writer - }; -public: - //! Constructors - rtm_rw_mutex() noexcept : write_flag(false) { - create_itt_sync(this, "tbb::speculative_spin_rw_mutex", ""); - } - - //! Destructor - ~rtm_rw_mutex() = default; - - //! Represents acquisition of a mutex. - class scoped_lock { - friend struct r1::rtm_rw_mutex_impl; - public: - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {} - - //! Acquire lock on given mutex. - scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) { - acquire(m, write); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if(m_transaction_state != rtm_type::rtm_not_in_mutex) { - release(); - } - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock on given mutex. - inline void acquire(rtm_rw_mutex& m, bool write = true); - - //! Try acquire lock on given mutex. - inline bool try_acquire(rtm_rw_mutex& m, bool write = true); - - //! Release lock - inline void release(); - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - inline bool upgrade_to_writer(); - - //! Downgrade writer to become a reader. - inline bool downgrade_to_reader(); - - private: - rtm_rw_mutex* m_mutex; - rtm_type m_transaction_state; - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = true; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = false; - -private: - alignas(speculation_granularity) std::atomic<bool> write_flag; -}; - -#if _MSC_VER && !defined(__INTEL_COMPILER) - #pragma warning (pop) // 4324 warning -#endif - -} // namespace d1 - -namespace r1 { - //! Internal acquire write lock. - // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); - //! Internal acquire read lock. - // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); - //! Internal upgrade reader to become a writer. - bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); - //! Internal downgrade writer to become a reader. - bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); - //! Internal try_acquire write lock. - bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); - //! Internal try_acquire read lock. - bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); - //! Internal release lock. - void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); -} - -namespace d1 { -//! Acquire lock on given mutex. -void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) { - __TBB_ASSERT(!m_mutex, "lock is already acquired"); - if (write) { - r1::acquire_writer(m, *this); - } else { - r1::acquire_reader(m, *this); - } -} - -//! Try acquire lock on given mutex. -bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) { - __TBB_ASSERT(!m_mutex, "lock is already acquired"); - if (write) { - return r1::try_acquire_writer(m, *this); - } else { - return r1::try_acquire_reader(m, *this); - } -} - -//! Release lock -void rtm_rw_mutex::scoped_lock::release() { - __TBB_ASSERT(m_mutex, "lock is not acquired"); - __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired"); - return r1::release(*this); -} - -//! Upgrade reader to become a writer. -/** Returns whether the upgrade happened without releasing and re-acquiring the lock */ -bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() { - __TBB_ASSERT(m_mutex, "lock is not acquired"); - if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) { - return true; // Already a writer - } - return r1::upgrade(*this); -} - -//! Downgrade writer to become a reader. -bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { - __TBB_ASSERT(m_mutex, "lock is not acquired"); - if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) { - return true; // Already a reader - } - return r1::downgrade(*this); -} - -#if TBB_USE_PROFILING_TOOLS -inline void set_name(rtm_rw_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif // WIN -#else -inline void set_name(rtm_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(rtm_rw_mutex&, const wchar_t*) {} -#endif // WIN -#endif - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__rtm_rw_mutex_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__rtm_rw_mutex_H +#define __TBB_detail__rtm_rw_mutex_H + +#include "_assert.h" +#include "_utils.h" +#include "../spin_rw_mutex.h" + +#include <atomic> + +namespace tbb { +namespace detail { + +namespace r1 { +struct rtm_rw_mutex_impl; +} + +namespace d1 { + +constexpr std::size_t speculation_granularity = 64; +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning (push) + #pragma warning (disable: 4324) +#endif + +//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference +/** @ingroup synchronization */ +class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex { + friend struct r1::rtm_rw_mutex_impl; +private: + enum class rtm_type { + rtm_not_in_mutex, + rtm_transacting_reader, + rtm_transacting_writer, + rtm_real_reader, + rtm_real_writer + }; +public: + //! Constructors + rtm_rw_mutex() noexcept : write_flag(false) { + create_itt_sync(this, "tbb::speculative_spin_rw_mutex", ""); + } + + //! Destructor + ~rtm_rw_mutex() = default; + + //! Represents acquisition of a mutex. + class scoped_lock { + friend struct r1::rtm_rw_mutex_impl; + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {} + + //! Acquire lock on given mutex. + scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) { + acquire(m, write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if(m_transaction_state != rtm_type::rtm_not_in_mutex) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + inline void acquire(rtm_rw_mutex& m, bool write = true); + + //! Try acquire lock on given mutex. + inline bool try_acquire(rtm_rw_mutex& m, bool write = true); + + //! Release lock + inline void release(); + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + inline bool upgrade_to_writer(); + + //! Downgrade writer to become a reader. + inline bool downgrade_to_reader(); + + private: + rtm_rw_mutex* m_mutex; + rtm_type m_transaction_state; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + +private: + alignas(speculation_granularity) std::atomic<bool> write_flag; +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) // 4324 warning +#endif + +} // namespace d1 + +namespace r1 { + //! Internal acquire write lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + //! Internal acquire read lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + //! Internal upgrade reader to become a writer. + bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); + //! Internal downgrade writer to become a reader. + bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); + //! Internal try_acquire write lock. + bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + //! Internal try_acquire read lock. + bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + //! Internal release lock. + void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); +} + +namespace d1 { +//! Acquire lock on given mutex. +void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + if (write) { + r1::acquire_writer(m, *this); + } else { + r1::acquire_reader(m, *this); + } +} + +//! Try acquire lock on given mutex. +bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + if (write) { + return r1::try_acquire_writer(m, *this); + } else { + return r1::try_acquire_reader(m, *this); + } +} + +//! Release lock +void rtm_rw_mutex::scoped_lock::release() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired"); + return r1::release(*this); +} + +//! Upgrade reader to become a writer. +/** Returns whether the upgrade happened without releasing and re-acquiring the lock */ +bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) { + return true; // Already a writer + } + return r1::upgrade(*this); +} + +//! Downgrade writer to become a reader. +bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) { + return true; // Already a reader + } + return r1::downgrade(*this); +} + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(rtm_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(rtm_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_rw_mutex&, const wchar_t*) {} +#endif // WIN +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__rtm_rw_mutex_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h index 480ec8135e..a676203137 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h @@ -1,563 +1,563 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__segment_table_H -#define __TBB_detail__segment_table_H - -#include "_config.h" -#include "_allocator_traits.h" -#include "_template_helpers.h" -#include "_utils.h" -#include "_assert.h" -#include "_exception.h" -#include <atomic> -#include <type_traits> -#include <memory> -#include <cstring> - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(push) -#pragma warning(disable: 4127) // warning C4127: conditional expression is constant -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable> -class segment_table { -public: - using value_type = T; - using segment_type = T*; - using atomic_segment = std::atomic<segment_type>; - using segment_table_type = atomic_segment*; - - using size_type = std::size_t; - using segment_index_type = std::size_t; - - using allocator_type = Allocator; - - using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; - using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>; -protected: - using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>; - using derived_type = DerivedType; - - static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable; - static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; -public: - segment_table( const allocator_type& alloc = allocator_type() ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) - , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} - { - zero_table(my_embedded_table, pointers_per_embedded_table); - } - - segment_table( const segment_table& other ) - : my_segment_table_allocator(segment_table_allocator_traits:: - select_on_container_copy_construction(other.my_segment_table_allocator)) - , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} - { - zero_table(my_embedded_table, pointers_per_embedded_table); - try_call( [&] { - internal_transfer(other, copy_segment_body_type{*this}); - } ).on_exception( [&] { - clear(); - }); - } - - segment_table( const segment_table& other, const allocator_type& alloc ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) - , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} - { - zero_table(my_embedded_table, pointers_per_embedded_table); - try_call( [&] { - internal_transfer(other, copy_segment_body_type{*this}); - } ).on_exception( [&] { - clear(); - }); - } - - segment_table( segment_table&& other ) - : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) - , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} - { - zero_table(my_embedded_table, pointers_per_embedded_table); - internal_move(std::move(other)); - } - - segment_table( segment_table&& other, const allocator_type& alloc ) - : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} - , my_size{}, my_segment_table_allocation_failed{} - { - zero_table(my_embedded_table, pointers_per_embedded_table); - using is_equal_type = typename segment_table_allocator_traits::is_always_equal; - internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); - } - - ~segment_table() { - clear(); - } - - segment_table& operator=( const segment_table& other ) { - if (this != &other) { - copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); - internal_transfer(other, copy_segment_body_type{*this}); - } - return *this; - } - - segment_table& operator=( segment_table&& other ) - noexcept(derived_type::is_noexcept_assignment) - { - using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment; - using is_equal_type = typename segment_table_allocator_traits::is_always_equal; - - if (this != &other) { - move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); - internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>()); - } - return *this; - } - - void swap( segment_table& other ) - noexcept(derived_type::is_noexcept_swap) - { - using is_equal_type = typename segment_table_allocator_traits::is_always_equal; - using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap; - - if (this != &other) { - swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator); - internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>()); - } - } - - segment_type get_segment( segment_index_type index ) const { - return get_table()[index] + segment_base(index); - } - - value_type& operator[]( size_type index ) { - return internal_subscript<true>(index); - } - - const value_type& operator[]( size_type index ) const { - return const_cast<segment_table*>(this)->internal_subscript<true>(index); - } - - const segment_table_allocator_type& get_allocator() const { - return my_segment_table_allocator; - } - - segment_table_allocator_type& get_allocator() { - return my_segment_table_allocator; - } - - void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) { - // Allocate new segment - segment_type new_segment = self()->create_segment(table, seg_index, index); - if (new_segment != nullptr) { - // Store (new_segment - segment_base) into the segment table to allow access to the table by index via - // my_segment_table[segment_index_of(index)][index] - segment_type disabled_segment = nullptr; - if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) { - // compare_exchange failed => some other thread has already enabled this segment - // Deallocate the memory - self()->deallocate_segment(new_segment, seg_index); - } - } - - segment = table[seg_index].load(std::memory_order_acquire); - __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table"); - } - - void delete_segment( segment_index_type seg_index ) { - segment_type disabled_segment = nullptr; - // Set the pointer to the segment to NULL in the table - segment_type segment_to_delete = get_table()[seg_index].exchange(disabled_segment); - if (segment_to_delete == segment_allocation_failure_tag) { - return; - } - - segment_to_delete += segment_base(seg_index); - - // Deallocate the segment - self()->destroy_segment(segment_to_delete, seg_index); - } - - size_type number_of_segments( segment_table_type table ) const { - // Check for an active table, if it is embedded table - return the number of embedded segments - // Otherwise - return the maximum number of segments - return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table; - } - - size_type capacity() const noexcept { - segment_table_type table = get_table(); - size_type num_segments = number_of_segments(table); - for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { - // Check if the pointer is valid (allocated) - if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) { - return segment_base(seg_index); - } - } - return segment_base(num_segments); - } - - size_type find_last_allocated_segment( segment_table_type table ) const noexcept { - size_type end = 0; - size_type num_segments = number_of_segments(table); - for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { - // Check if the pointer is valid (allocated) - if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) { - end = seg_index + 1; - } - } - return end; - } - - void reserve( size_type n ) { - if (n > allocator_traits_type::max_size(my_segment_table_allocator)) { - throw_exception(exception_id::reservation_length_error); - } - - size_type size = my_size.load(std::memory_order_relaxed); - segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1; - for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) { - size_type first_index = segment_base(seg_idx); - internal_subscript<true>(first_index); - } - } - - void clear() { - clear_segments(); - clear_table(); - my_size.store(0, std::memory_order_relaxed); - my_first_block.store(0, std::memory_order_relaxed); - } - - void clear_segments() { - segment_table_type current_segment_table = get_table(); - for (size_type i = number_of_segments(current_segment_table); i != 0; --i) { - if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) { - // If the segment was enabled - disable and deallocate it - delete_segment(i - 1); - } - } - } - - void clear_table() { - segment_table_type current_segment_table = get_table(); - if (current_segment_table != my_embedded_table) { - // If the active table is not the embedded one - deallocate the active table - for (size_type i = 0; i != pointers_per_long_table; ++i) { - segment_table_allocator_traits::destroy(my_segment_table_allocator, ¤t_segment_table[i]); - } - - segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table); - my_segment_table.store(my_embedded_table, std::memory_order_relaxed); - zero_table(my_embedded_table, pointers_per_embedded_table); - } - } - - void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) { - // extend_segment_table if an active table is an embedded table - // and the requested index is not in the embedded table - if (table == my_embedded_table && end_index > embedded_table_size) { - if (start_index <= embedded_table_size) { - try_call([&] { - table = self()->allocate_long_table(my_embedded_table, start_index); - // It is possible that the table was extended by the thread that allocated first_block. - // In this case it is necessary to re-read the current table. - - if (table) { - my_segment_table.store(table, std::memory_order_release); - } else { - table = my_segment_table.load(std::memory_order_acquire); - } - }).on_exception([&] { - my_segment_table_allocation_failed.store(true, std::memory_order_relaxed); - }); - } else { - atomic_backoff backoff; - do { - if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) { - throw_exception(exception_id::bad_alloc); - } - backoff.pause(); - table = my_segment_table.load(std::memory_order_acquire); - } while (table == my_embedded_table); - } - } - } - - // Return the segment where index is stored - static constexpr segment_index_type segment_index_of( size_type index ) { - return size_type(tbb::detail::log2(uintptr_t(index|1))); - } - - // Needed to calculate the offset in segment - static constexpr size_type segment_base( size_type index ) { - return size_type(1) << index & ~size_type(1); - } - - // Return size of the segment - static constexpr size_type segment_size( size_type index ) { - return index == 0 ? 2 : size_type(1) << index; - } - -private: - - derived_type* self() { - return static_cast<derived_type*>(this); - } - - struct copy_segment_body_type { - void operator()( segment_index_type index, segment_type from, segment_type to ) const { - my_instance.self()->copy_segment(index, from, to); - } - segment_table& my_instance; - }; - - struct move_segment_body_type { - void operator()( segment_index_type index, segment_type from, segment_type to ) const { - my_instance.self()->move_segment(index, from, to); - } - segment_table& my_instance; - }; - - // Transgers all segments from the other table - template <typename TransferBody> - void internal_transfer( const segment_table& other, TransferBody transfer_segment ) { - static_cast<derived_type*>(this)->destroy_elements(); - - assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed)); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - - segment_table_type other_table = other.get_table(); - size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table)); - - // If an exception occurred in other, then the size may be greater than the size of the end segment. - size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ? - other.my_size.load(std::memory_order_relaxed) : end_segment_size; - other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size; - - for (segment_index_type i = 0; segment_base(i) < other_size; ++i) { - // If the segment in other table is enabled - transfer it - if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag) - { - my_size = segment_base(i); - break; - } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) { - internal_subscript<true>(segment_base(i)); - transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i), - get_table()[i].load(std::memory_order_relaxed) + segment_base(i)); - } - } - } - - // Moves the other segment table - // Only equal allocators are allowed - void internal_move( segment_table&& other ) { - // NOTE: allocators should be equal - clear(); - my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - // If an active table in other is embedded - restore all of the embedded segments - if (other.get_table() == other.my_embedded_table) { - for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) { - segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); - my_embedded_table[i].store(other_segment, std::memory_order_relaxed); - other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed); - } - my_segment_table.store(my_embedded_table, std::memory_order_relaxed); - } else { - my_segment_table.store(other.my_segment_table, std::memory_order_relaxed); - other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); - zero_table(other.my_embedded_table, pointers_per_embedded_table); - } - other.my_size.store(0, std::memory_order_relaxed); - } - - // Move construct the segment table with the allocator object - // if any instances of allocator_type are always equal - void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&, - /*is_always_equal = */ std::true_type ) { - internal_move(std::move(other)); - } - - // Move construct the segment table with the allocator object - // if any instances of allocator_type are always equal - void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc, - /*is_always_equal = */ std::false_type ) { - if (other.my_segment_table_allocator == alloc) { - // If allocators are equal - restore pointers - internal_move(std::move(other)); - } else { - // If allocators are not equal - perform per element move with reallocation - try_call( [&] { - internal_transfer(other, move_segment_body_type{*this}); - } ).on_exception( [&] { - clear(); - }); - } - } - - // Move assigns the segment table to other is any instances of allocator_type are always equal - // or propagate_on_container_move_assignment is true - void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) { - internal_move(std::move(other)); - } - - // Move assigns the segment table to other is any instances of allocator_type are not always equal - // and propagate_on_container_move_assignment is false - void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) { - if (my_segment_table_allocator == other.my_segment_table_allocator) { - // If allocators are equal - restore pointers - internal_move(std::move(other)); - } else { - // If allocators are not equal - perform per element move with reallocation - internal_transfer(other, move_segment_body_type{*this}); - } - } - - // Swaps two segment tables if any instances of allocator_type are always equal - // or propagate_on_container_swap is true - void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) { - internal_swap_fields(other); - } - - // Swaps two segment tables if any instances of allocator_type are not always equal - // and propagate_on_container_swap is false - // According to the C++ standard, swapping of two containers with unequal allocators - // is an undefined behavior scenario - void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) { - __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator, - "Swapping with unequal allocators is not allowed"); - internal_swap_fields(other); - } - - void internal_swap_fields( segment_table& other ) { - // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables - if (get_table() == my_embedded_table || - other.get_table() == other.my_embedded_table) { - - for (size_type i = 0; i != pointers_per_embedded_table; ++i) { - segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed); - segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); - - my_embedded_table[i].store(other_segment, std::memory_order_relaxed); - other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed); - } - } - - segment_table_type current_segment_table = get_table(); - segment_table_type other_segment_table = other.get_table(); - - // If an active table is an embedded one - - // store an active table in other to the embedded one from other - if (current_segment_table == my_embedded_table) { - other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); - } else { - // Otherwise - store it to the active segment table - other.my_segment_table.store(current_segment_table, std::memory_order_relaxed); - } - - // If an active table in other segment table is an embedded one - - // store an active table in other to the embedded one from *this - if (other_segment_table == other.my_embedded_table) { - my_segment_table.store(my_embedded_table, std::memory_order_relaxed); - } else { - // Otherwise - store it to the active segment table in other - my_segment_table.store(other_segment_table, std::memory_order_relaxed); - } - auto first_block = other.my_first_block.load(std::memory_order_relaxed); - other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_first_block.store(first_block, std::memory_order_relaxed); - - auto size = other.my_size.load(std::memory_order_relaxed); - other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); - my_size.store(size, std::memory_order_relaxed); - } - -protected: - // A flag indicates that an exception was throws during segment allocations - const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1); - static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table); - - template <bool allow_out_of_range_access> - value_type& internal_subscript( size_type index ) { - segment_index_type seg_index = segment_index_of(index); - segment_table_type table = my_segment_table.load(std::memory_order_acquire); - segment_type segment = nullptr; - - if (allow_out_of_range_access) { - if (derived_type::allow_table_extending) { - extend_table_if_necessary(table, index, index + 1); - } - - segment = table[seg_index].load(std::memory_order_acquire); - // If the required segment is disabled - enable it - if (segment == nullptr) { - enable_segment(segment, table, seg_index, index); - } - // Check if an exception was thrown during segment allocation - if (segment == segment_allocation_failure_tag) { - throw_exception(exception_id::bad_alloc); - } - } else { - segment = table[seg_index].load(std::memory_order_acquire); - } - __TBB_ASSERT(segment != nullptr, nullptr); - - return segment[index]; - } - - void assign_first_block_if_necessary(segment_index_type index) { - size_type zero = 0; - if (this->my_first_block.load(std::memory_order_relaxed) == zero) { - this->my_first_block.compare_exchange_strong(zero, index); - } - } - - void zero_table( segment_table_type table, size_type count ) { - for (size_type i = 0; i != count; ++i) { - table[i].store(nullptr, std::memory_order_relaxed); - } - } - - segment_table_type get_table() const { - return my_segment_table.load(std::memory_order_acquire); - } - - segment_table_allocator_type my_segment_table_allocator; - std::atomic<segment_table_type> my_segment_table; - atomic_segment my_embedded_table[pointers_per_embedded_table]; - // Number of segments in first block - std::atomic<size_type> my_first_block; - // Number of elements in table - std::atomic<size_type> my_size; - // Flag to indicate failed extend table - std::atomic<bool> my_segment_table_allocation_failed; -}; // class segment_table - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#pragma warning(pop) // warning 4127 is back -#endif - -#endif // __TBB_detail__segment_table_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__segment_table_H +#define __TBB_detail__segment_table_H + +#include "_config.h" +#include "_allocator_traits.h" +#include "_template_helpers.h" +#include "_utils.h" +#include "_assert.h" +#include "_exception.h" +#include <atomic> +#include <type_traits> +#include <memory> +#include <cstring> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable> +class segment_table { +public: + using value_type = T; + using segment_type = T*; + using atomic_segment = std::atomic<segment_type>; + using segment_table_type = atomic_segment*; + + using size_type = std::size_t; + using segment_index_type = std::size_t; + + using allocator_type = Allocator; + + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>; +protected: + using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>; + using derived_type = DerivedType; + + static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable; + static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; +public: + segment_table( const allocator_type& alloc = allocator_type() ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + } + + segment_table( const segment_table& other ) + : my_segment_table_allocator(segment_table_allocator_traits:: + select_on_container_copy_construction(other.my_segment_table_allocator)) + , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + try_call( [&] { + internal_transfer(other, copy_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + + segment_table( const segment_table& other, const allocator_type& alloc ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + try_call( [&] { + internal_transfer(other, copy_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + + segment_table( segment_table&& other ) + : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + internal_move(std::move(other)); + } + + segment_table( segment_table&& other, const allocator_type& alloc ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} + , my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); + } + + ~segment_table() { + clear(); + } + + segment_table& operator=( const segment_table& other ) { + if (this != &other) { + copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_transfer(other, copy_segment_body_type{*this}); + } + return *this; + } + + segment_table& operator=( segment_table&& other ) + noexcept(derived_type::is_noexcept_assignment) + { + using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment; + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + + if (this != &other) { + move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>()); + } + return *this; + } + + void swap( segment_table& other ) + noexcept(derived_type::is_noexcept_swap) + { + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap; + + if (this != &other) { + swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>()); + } + } + + segment_type get_segment( segment_index_type index ) const { + return get_table()[index] + segment_base(index); + } + + value_type& operator[]( size_type index ) { + return internal_subscript<true>(index); + } + + const value_type& operator[]( size_type index ) const { + return const_cast<segment_table*>(this)->internal_subscript<true>(index); + } + + const segment_table_allocator_type& get_allocator() const { + return my_segment_table_allocator; + } + + segment_table_allocator_type& get_allocator() { + return my_segment_table_allocator; + } + + void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) { + // Allocate new segment + segment_type new_segment = self()->create_segment(table, seg_index, index); + if (new_segment != nullptr) { + // Store (new_segment - segment_base) into the segment table to allow access to the table by index via + // my_segment_table[segment_index_of(index)][index] + segment_type disabled_segment = nullptr; + if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) { + // compare_exchange failed => some other thread has already enabled this segment + // Deallocate the memory + self()->deallocate_segment(new_segment, seg_index); + } + } + + segment = table[seg_index].load(std::memory_order_acquire); + __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table"); + } + + void delete_segment( segment_index_type seg_index ) { + segment_type disabled_segment = nullptr; + // Set the pointer to the segment to NULL in the table + segment_type segment_to_delete = get_table()[seg_index].exchange(disabled_segment); + if (segment_to_delete == segment_allocation_failure_tag) { + return; + } + + segment_to_delete += segment_base(seg_index); + + // Deallocate the segment + self()->destroy_segment(segment_to_delete, seg_index); + } + + size_type number_of_segments( segment_table_type table ) const { + // Check for an active table, if it is embedded table - return the number of embedded segments + // Otherwise - return the maximum number of segments + return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table; + } + + size_type capacity() const noexcept { + segment_table_type table = get_table(); + size_type num_segments = number_of_segments(table); + for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { + // Check if the pointer is valid (allocated) + if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) { + return segment_base(seg_index); + } + } + return segment_base(num_segments); + } + + size_type find_last_allocated_segment( segment_table_type table ) const noexcept { + size_type end = 0; + size_type num_segments = number_of_segments(table); + for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { + // Check if the pointer is valid (allocated) + if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) { + end = seg_index + 1; + } + } + return end; + } + + void reserve( size_type n ) { + if (n > allocator_traits_type::max_size(my_segment_table_allocator)) { + throw_exception(exception_id::reservation_length_error); + } + + size_type size = my_size.load(std::memory_order_relaxed); + segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1; + for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) { + size_type first_index = segment_base(seg_idx); + internal_subscript<true>(first_index); + } + } + + void clear() { + clear_segments(); + clear_table(); + my_size.store(0, std::memory_order_relaxed); + my_first_block.store(0, std::memory_order_relaxed); + } + + void clear_segments() { + segment_table_type current_segment_table = get_table(); + for (size_type i = number_of_segments(current_segment_table); i != 0; --i) { + if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) { + // If the segment was enabled - disable and deallocate it + delete_segment(i - 1); + } + } + } + + void clear_table() { + segment_table_type current_segment_table = get_table(); + if (current_segment_table != my_embedded_table) { + // If the active table is not the embedded one - deallocate the active table + for (size_type i = 0; i != pointers_per_long_table; ++i) { + segment_table_allocator_traits::destroy(my_segment_table_allocator, ¤t_segment_table[i]); + } + + segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table); + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + zero_table(my_embedded_table, pointers_per_embedded_table); + } + } + + void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) { + // extend_segment_table if an active table is an embedded table + // and the requested index is not in the embedded table + if (table == my_embedded_table && end_index > embedded_table_size) { + if (start_index <= embedded_table_size) { + try_call([&] { + table = self()->allocate_long_table(my_embedded_table, start_index); + // It is possible that the table was extended by the thread that allocated first_block. + // In this case it is necessary to re-read the current table. + + if (table) { + my_segment_table.store(table, std::memory_order_release); + } else { + table = my_segment_table.load(std::memory_order_acquire); + } + }).on_exception([&] { + my_segment_table_allocation_failed.store(true, std::memory_order_relaxed); + }); + } else { + atomic_backoff backoff; + do { + if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) { + throw_exception(exception_id::bad_alloc); + } + backoff.pause(); + table = my_segment_table.load(std::memory_order_acquire); + } while (table == my_embedded_table); + } + } + } + + // Return the segment where index is stored + static constexpr segment_index_type segment_index_of( size_type index ) { + return size_type(tbb::detail::log2(uintptr_t(index|1))); + } + + // Needed to calculate the offset in segment + static constexpr size_type segment_base( size_type index ) { + return size_type(1) << index & ~size_type(1); + } + + // Return size of the segment + static constexpr size_type segment_size( size_type index ) { + return index == 0 ? 2 : size_type(1) << index; + } + +private: + + derived_type* self() { + return static_cast<derived_type*>(this); + } + + struct copy_segment_body_type { + void operator()( segment_index_type index, segment_type from, segment_type to ) const { + my_instance.self()->copy_segment(index, from, to); + } + segment_table& my_instance; + }; + + struct move_segment_body_type { + void operator()( segment_index_type index, segment_type from, segment_type to ) const { + my_instance.self()->move_segment(index, from, to); + } + segment_table& my_instance; + }; + + // Transgers all segments from the other table + template <typename TransferBody> + void internal_transfer( const segment_table& other, TransferBody transfer_segment ) { + static_cast<derived_type*>(this)->destroy_elements(); + + assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed)); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + + segment_table_type other_table = other.get_table(); + size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table)); + + // If an exception occurred in other, then the size may be greater than the size of the end segment. + size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ? + other.my_size.load(std::memory_order_relaxed) : end_segment_size; + other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size; + + for (segment_index_type i = 0; segment_base(i) < other_size; ++i) { + // If the segment in other table is enabled - transfer it + if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag) + { + my_size = segment_base(i); + break; + } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) { + internal_subscript<true>(segment_base(i)); + transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i), + get_table()[i].load(std::memory_order_relaxed) + segment_base(i)); + } + } + } + + // Moves the other segment table + // Only equal allocators are allowed + void internal_move( segment_table&& other ) { + // NOTE: allocators should be equal + clear(); + my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + // If an active table in other is embedded - restore all of the embedded segments + if (other.get_table() == other.my_embedded_table) { + for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) { + segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); + my_embedded_table[i].store(other_segment, std::memory_order_relaxed); + other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed); + } + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + } else { + my_segment_table.store(other.my_segment_table, std::memory_order_relaxed); + other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); + zero_table(other.my_embedded_table, pointers_per_embedded_table); + } + other.my_size.store(0, std::memory_order_relaxed); + } + + // Move construct the segment table with the allocator object + // if any instances of allocator_type are always equal + void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&, + /*is_always_equal = */ std::true_type ) { + internal_move(std::move(other)); + } + + // Move construct the segment table with the allocator object + // if any instances of allocator_type are always equal + void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc, + /*is_always_equal = */ std::false_type ) { + if (other.my_segment_table_allocator == alloc) { + // If allocators are equal - restore pointers + internal_move(std::move(other)); + } else { + // If allocators are not equal - perform per element move with reallocation + try_call( [&] { + internal_transfer(other, move_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + } + + // Move assigns the segment table to other is any instances of allocator_type are always equal + // or propagate_on_container_move_assignment is true + void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) { + internal_move(std::move(other)); + } + + // Move assigns the segment table to other is any instances of allocator_type are not always equal + // and propagate_on_container_move_assignment is false + void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) { + if (my_segment_table_allocator == other.my_segment_table_allocator) { + // If allocators are equal - restore pointers + internal_move(std::move(other)); + } else { + // If allocators are not equal - perform per element move with reallocation + internal_transfer(other, move_segment_body_type{*this}); + } + } + + // Swaps two segment tables if any instances of allocator_type are always equal + // or propagate_on_container_swap is true + void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) { + internal_swap_fields(other); + } + + // Swaps two segment tables if any instances of allocator_type are not always equal + // and propagate_on_container_swap is false + // According to the C++ standard, swapping of two containers with unequal allocators + // is an undefined behavior scenario + void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) { + __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator, + "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + void internal_swap_fields( segment_table& other ) { + // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables + if (get_table() == my_embedded_table || + other.get_table() == other.my_embedded_table) { + + for (size_type i = 0; i != pointers_per_embedded_table; ++i) { + segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed); + segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); + + my_embedded_table[i].store(other_segment, std::memory_order_relaxed); + other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed); + } + } + + segment_table_type current_segment_table = get_table(); + segment_table_type other_segment_table = other.get_table(); + + // If an active table is an embedded one - + // store an active table in other to the embedded one from other + if (current_segment_table == my_embedded_table) { + other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); + } else { + // Otherwise - store it to the active segment table + other.my_segment_table.store(current_segment_table, std::memory_order_relaxed); + } + + // If an active table in other segment table is an embedded one - + // store an active table in other to the embedded one from *this + if (other_segment_table == other.my_embedded_table) { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + } else { + // Otherwise - store it to the active segment table in other + my_segment_table.store(other_segment_table, std::memory_order_relaxed); + } + auto first_block = other.my_first_block.load(std::memory_order_relaxed); + other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_first_block.store(first_block, std::memory_order_relaxed); + + auto size = other.my_size.load(std::memory_order_relaxed); + other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_size.store(size, std::memory_order_relaxed); + } + +protected: + // A flag indicates that an exception was throws during segment allocations + const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1); + static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table); + + template <bool allow_out_of_range_access> + value_type& internal_subscript( size_type index ) { + segment_index_type seg_index = segment_index_of(index); + segment_table_type table = my_segment_table.load(std::memory_order_acquire); + segment_type segment = nullptr; + + if (allow_out_of_range_access) { + if (derived_type::allow_table_extending) { + extend_table_if_necessary(table, index, index + 1); + } + + segment = table[seg_index].load(std::memory_order_acquire); + // If the required segment is disabled - enable it + if (segment == nullptr) { + enable_segment(segment, table, seg_index, index); + } + // Check if an exception was thrown during segment allocation + if (segment == segment_allocation_failure_tag) { + throw_exception(exception_id::bad_alloc); + } + } else { + segment = table[seg_index].load(std::memory_order_acquire); + } + __TBB_ASSERT(segment != nullptr, nullptr); + + return segment[index]; + } + + void assign_first_block_if_necessary(segment_index_type index) { + size_type zero = 0; + if (this->my_first_block.load(std::memory_order_relaxed) == zero) { + this->my_first_block.compare_exchange_strong(zero, index); + } + } + + void zero_table( segment_table_type table, size_type count ) { + for (size_type i = 0; i != count; ++i) { + table[i].store(nullptr, std::memory_order_relaxed); + } + } + + segment_table_type get_table() const { + return my_segment_table.load(std::memory_order_acquire); + } + + segment_table_allocator_type my_segment_table_allocator; + std::atomic<segment_table_type> my_segment_table; + atomic_segment my_embedded_table[pointers_per_embedded_table]; + // Number of segments in first block + std::atomic<size_type> my_first_block; + // Number of elements in table + std::atomic<size_type> my_size; + // Flag to indicate failed extend table + std::atomic<bool> my_segment_table_allocation_failed; +}; // class segment_table + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +#endif // __TBB_detail__segment_table_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h index 8a10a61e1a..d7c6258f4b 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h @@ -1,108 +1,108 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__small_object_pool_H -#define __TBB__small_object_pool_H - -#include "_config.h" -#include "_assert.h" - -#include "../profiling.h" -#include <cstddef> -#include <cstdint> -#include <atomic> - -namespace tbb { -namespace detail { - -namespace d1 { -class small_object_pool { -protected: - small_object_pool() = default; -}; -struct execution_data; -} - -namespace r1 { -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, - const d1::execution_data& ed); -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, - const d1::execution_data& ed); -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); -} - -namespace d1 { -class small_object_allocator { -public: - template <typename Type, typename... Args> - Type* new_object(execution_data& ed, Args&&... args) { - void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed); - - auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); - return constructed_object; - } - - template <typename Type, typename... Args> - Type* new_object(Args&&... args) { - void* allocated_object = r1::allocate(m_pool, sizeof(Type)); - - auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); - return constructed_object; - } - - template <typename Type> - void delete_object(Type* object, const execution_data& ed) { - // Copy this since the it can be the member of the passed object and - // unintentionally destroyed when Type destructor is called below - small_object_allocator alloc = *this; - object->~Type(); - alloc.deallocate(object, ed); - } - - template <typename Type> - void delete_object(Type* object) { - // Copy this since the it can be the member of the passed object and - // unintentionally destroyed when Type destructor is called below - small_object_allocator alloc = *this; - object->~Type(); - alloc.deallocate(object); - } - - template <typename Type> - void deallocate(Type* ptr, const execution_data& ed) { - call_itt_task_notify(destroy, ptr); - - __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); - r1::deallocate(*m_pool, ptr, sizeof(Type), ed); - } - - template <typename Type> - void deallocate(Type* ptr) { - call_itt_task_notify(destroy, ptr); - - __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); - r1::deallocate(*m_pool, ptr, sizeof(Type)); - } -private: - small_object_pool* m_pool{}; -}; - -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif /* __TBB__small_object_pool_H */ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__small_object_pool_H +#define __TBB__small_object_pool_H + +#include "_config.h" +#include "_assert.h" + +#include "../profiling.h" +#include <cstddef> +#include <cstdint> +#include <atomic> + +namespace tbb { +namespace detail { + +namespace d1 { +class small_object_pool { +protected: + small_object_pool() = default; +}; +struct execution_data; +} + +namespace r1 { +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, + const d1::execution_data& ed); +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, + const d1::execution_data& ed); +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); +} + +namespace d1 { +class small_object_allocator { +public: + template <typename Type, typename... Args> + Type* new_object(execution_data& ed, Args&&... args) { + void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed); + + auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); + return constructed_object; + } + + template <typename Type, typename... Args> + Type* new_object(Args&&... args) { + void* allocated_object = r1::allocate(m_pool, sizeof(Type)); + + auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); + return constructed_object; + } + + template <typename Type> + void delete_object(Type* object, const execution_data& ed) { + // Copy this since the it can be the member of the passed object and + // unintentionally destroyed when Type destructor is called below + small_object_allocator alloc = *this; + object->~Type(); + alloc.deallocate(object, ed); + } + + template <typename Type> + void delete_object(Type* object) { + // Copy this since the it can be the member of the passed object and + // unintentionally destroyed when Type destructor is called below + small_object_allocator alloc = *this; + object->~Type(); + alloc.deallocate(object); + } + + template <typename Type> + void deallocate(Type* ptr, const execution_data& ed) { + call_itt_task_notify(destroy, ptr); + + __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); + r1::deallocate(*m_pool, ptr, sizeof(Type), ed); + } + + template <typename Type> + void deallocate(Type* ptr) { + call_itt_task_notify(destroy, ptr); + + __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); + r1::deallocate(*m_pool, ptr, sizeof(Type)); + } +private: + small_object_pool* m_pool{}; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__small_object_pool_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h index c06d5b5db0..a295f48ddb 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h @@ -1,78 +1,78 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm") -TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for") -TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each") -TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke") -TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce") -TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan") -TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort") -TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline") -TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom") - -TBB_STRING_RESOURCE(FLOW_NULL, "null") -TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node") -TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node") -TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node") -TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node") -TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)") -TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)") -TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)") -TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node") -TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node") -TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node") -TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node") -TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node") -TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node") -TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node") -TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node") -TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node") -TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node") -TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node") -TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8") -TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8") -TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9") -TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name") -TBB_STRING_RESOURCE(FLOW_BODY, "body") -TBB_STRING_RESOURCE(FLOW_GRAPH, "graph") -TBB_STRING_RESOURCE(FLOW_NODE, "node") -TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph") -TBB_STRING_RESOURCE(USER_EVENT, "user_event") - -#if __TBB_FLOW_TRACE_CODEPTR -TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address") -#endif +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm") +TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for") +TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each") +TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke") +TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce") +TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan") +TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort") +TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline") +TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom") + +TBB_STRING_RESOURCE(FLOW_NULL, "null") +TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node") +TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node") +TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node") +TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)") +TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node") +TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node") +TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node") +TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node") +TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node") +TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node") +TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node") +TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node") +TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node") +TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node") +TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node") +TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9") +TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name") +TBB_STRING_RESOURCE(FLOW_BODY, "body") +TBB_STRING_RESOURCE(FLOW_GRAPH, "graph") +TBB_STRING_RESOURCE(FLOW_NODE, "node") +TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph") +TBB_STRING_RESOURCE(USER_EVENT, "user_event") + +#if __TBB_FLOW_TRACE_CODEPTR +TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address") +#endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h index 7b4f8521c6..32a2d9c8e8 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h @@ -1,243 +1,243 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB__task_H -#define __TBB__task_H - -#include "_config.h" -#include "_assert.h" -#include "_template_helpers.h" -#include "_small_object_pool.h" - -#include "../profiling.h" - -#include <cstddef> -#include <cstdint> -#include <climits> -#include <utility> -#include <atomic> -#include <mutex> - -namespace tbb { -namespace detail { - -namespace d1 { -using slot_id = unsigned short; -constexpr slot_id no_slot = slot_id(~0); -constexpr slot_id any_slot = slot_id(~1); - -class task; -class wait_context; -class task_group_context; -struct execution_data; -} - -namespace r1 { -//! Task spawn/wait entry points -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); -void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); -void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); -d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); -d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); - -// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. -struct suspend_point_type; -using suspend_callback_type = void(*)(void*, suspend_point_type*); -//! The resumable tasks entry points -void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); -void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); -suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); -void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); - -class thread_data; -class task_dispatcher; -class external_waiter; -struct task_accessor; -struct task_arena_impl; -} // namespace r1 - -namespace d1 { - -class task_arena; -using suspend_point = r1::suspend_point_type*; - -#if __TBB_RESUMABLE_TASKS -template <typename F> -static void suspend_callback(void* user_callback, suspend_point sp) { - // Copy user function to a new stack after the context switch to avoid a race when the previous - // suspend point is resumed while the user_callback is being called. - F user_callback_copy = *static_cast<F*>(user_callback); - user_callback_copy(sp); -} - -template <typename F> -void suspend(F f) { - r1::suspend(&suspend_callback<F>, &f); -} - -inline void resume(suspend_point tag) { - r1::resume(tag); -} -#endif /* __TBB_RESUMABLE_TASKS */ - -// TODO align wait_context on cache lane -class wait_context { - static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1); - - std::uint64_t m_version_and_traits{1}; - std::atomic<std::uint64_t> m_ref_count{}; - - void add_reference(std::int64_t delta) { - call_itt_task_notify(releasing, this); - std::uint64_t r = m_ref_count.fetch_add(delta) + delta; - - __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); - - if (!r) { - // Some external waiters or coroutine waiters sleep in wait list - // Should to notify them that work is done - std::uintptr_t wait_ctx_addr = std::uintptr_t(this); - r1::notify_waiters(wait_ctx_addr); - } - } - - bool continue_execution() const { - std::uint64_t r = m_ref_count.load(std::memory_order_acquire); - __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); - return r > 0; - } - - friend class r1::thread_data; - friend class r1::task_dispatcher; - friend class r1::external_waiter; - friend class task_group; - friend class task_group_base; - friend struct r1::task_arena_impl; - friend struct r1::suspend_point_type; -public: - // Despite the internal reference count is uin64_t we limit the user interface with uint32_t - // to preserve a part of the internal reference count for special needs. - wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } - wait_context(const wait_context&) = delete; - - ~wait_context() { - __TBB_ASSERT(!continue_execution(), NULL); - } - - void reserve(std::uint32_t delta = 1) { - add_reference(delta); - } - - void release(std::uint32_t delta = 1) { - add_reference(-std::int64_t(delta)); - } -#if __TBB_EXTRA_DEBUG - unsigned reference_count() const { - return unsigned(m_ref_count.load(std::memory_order_acquire)); - } -#endif -}; - -struct execution_data { - task_group_context* context{}; - slot_id original_slot{}; - slot_id affinity_slot{}; -}; - -inline task_group_context* context(const execution_data& ed) { - return ed.context; -} - -inline slot_id original_slot(const execution_data& ed) { - return ed.original_slot; -} - -inline slot_id affinity_slot(const execution_data& ed) { - return ed.affinity_slot; -} - -inline slot_id execution_slot(const execution_data& ed) { - return r1::execution_slot(&ed); -} - -inline bool is_same_affinity(const execution_data& ed) { - return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed); -} - -inline bool is_stolen(const execution_data& ed) { - return original_slot(ed) != execution_slot(ed); -} - -inline void spawn(task& t, task_group_context& ctx) { - call_itt_task_notify(releasing, &t); - r1::spawn(t, ctx); -} - -inline void spawn(task& t, task_group_context& ctx, slot_id id) { - call_itt_task_notify(releasing, &t); - r1::spawn(t, ctx, id); -} - -inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) { - r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx); - call_itt_task_notify(acquired, &wait_ctx); - call_itt_task_notify(destroy, &wait_ctx); -} - -inline void wait(wait_context& wait_ctx, task_group_context& ctx) { - r1::wait(wait_ctx, ctx); - call_itt_task_notify(acquired, &wait_ctx); - call_itt_task_notify(destroy, &wait_ctx); -} - -using r1::current_context; - -class task_traits { - std::uint64_t m_version_and_traits{}; - friend struct r1::task_accessor; -}; - -//! Alignment for a task object -static constexpr std::size_t task_alignment = 64; - -//! Base class for user-defined tasks. -/** @ingroup task_scheduling */ - -class -#if __TBB_ALIGNAS_AVAILABLE -alignas(task_alignment) -#endif -task : public task_traits { -protected: - virtual ~task() = default; - -public: - virtual task* execute(execution_data&) = 0; - virtual task* cancel(execution_data&) = 0; - -private: - std::uint64_t m_reserved[6]{}; - friend struct r1::task_accessor; -}; -#if __TBB_ALIGNAS_AVAILABLE -static_assert(sizeof(task) == task_alignment, "task size is broken"); -#endif -} // namespace d1 -} // namespace detail -} // namespace tbb - -#endif /* __TBB__task_H */ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__task_H +#define __TBB__task_H + +#include "_config.h" +#include "_assert.h" +#include "_template_helpers.h" +#include "_small_object_pool.h" + +#include "../profiling.h" + +#include <cstddef> +#include <cstdint> +#include <climits> +#include <utility> +#include <atomic> +#include <mutex> + +namespace tbb { +namespace detail { + +namespace d1 { +using slot_id = unsigned short; +constexpr slot_id no_slot = slot_id(~0); +constexpr slot_id any_slot = slot_id(~1); + +class task; +class wait_context; +class task_group_context; +struct execution_data; +} + +namespace r1 { +//! Task spawn/wait entry points +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); +void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); +void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); +d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); +d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); + +// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. +struct suspend_point_type; +using suspend_callback_type = void(*)(void*, suspend_point_type*); +//! The resumable tasks entry points +void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); +void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); +suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); +void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); + +class thread_data; +class task_dispatcher; +class external_waiter; +struct task_accessor; +struct task_arena_impl; +} // namespace r1 + +namespace d1 { + +class task_arena; +using suspend_point = r1::suspend_point_type*; + +#if __TBB_RESUMABLE_TASKS +template <typename F> +static void suspend_callback(void* user_callback, suspend_point sp) { + // Copy user function to a new stack after the context switch to avoid a race when the previous + // suspend point is resumed while the user_callback is being called. + F user_callback_copy = *static_cast<F*>(user_callback); + user_callback_copy(sp); +} + +template <typename F> +void suspend(F f) { + r1::suspend(&suspend_callback<F>, &f); +} + +inline void resume(suspend_point tag) { + r1::resume(tag); +} +#endif /* __TBB_RESUMABLE_TASKS */ + +// TODO align wait_context on cache lane +class wait_context { + static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1); + + std::uint64_t m_version_and_traits{1}; + std::atomic<std::uint64_t> m_ref_count{}; + + void add_reference(std::int64_t delta) { + call_itt_task_notify(releasing, this); + std::uint64_t r = m_ref_count.fetch_add(delta) + delta; + + __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); + + if (!r) { + // Some external waiters or coroutine waiters sleep in wait list + // Should to notify them that work is done + std::uintptr_t wait_ctx_addr = std::uintptr_t(this); + r1::notify_waiters(wait_ctx_addr); + } + } + + bool continue_execution() const { + std::uint64_t r = m_ref_count.load(std::memory_order_acquire); + __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); + return r > 0; + } + + friend class r1::thread_data; + friend class r1::task_dispatcher; + friend class r1::external_waiter; + friend class task_group; + friend class task_group_base; + friend struct r1::task_arena_impl; + friend struct r1::suspend_point_type; +public: + // Despite the internal reference count is uin64_t we limit the user interface with uint32_t + // to preserve a part of the internal reference count for special needs. + wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } + wait_context(const wait_context&) = delete; + + ~wait_context() { + __TBB_ASSERT(!continue_execution(), NULL); + } + + void reserve(std::uint32_t delta = 1) { + add_reference(delta); + } + + void release(std::uint32_t delta = 1) { + add_reference(-std::int64_t(delta)); + } +#if __TBB_EXTRA_DEBUG + unsigned reference_count() const { + return unsigned(m_ref_count.load(std::memory_order_acquire)); + } +#endif +}; + +struct execution_data { + task_group_context* context{}; + slot_id original_slot{}; + slot_id affinity_slot{}; +}; + +inline task_group_context* context(const execution_data& ed) { + return ed.context; +} + +inline slot_id original_slot(const execution_data& ed) { + return ed.original_slot; +} + +inline slot_id affinity_slot(const execution_data& ed) { + return ed.affinity_slot; +} + +inline slot_id execution_slot(const execution_data& ed) { + return r1::execution_slot(&ed); +} + +inline bool is_same_affinity(const execution_data& ed) { + return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed); +} + +inline bool is_stolen(const execution_data& ed) { + return original_slot(ed) != execution_slot(ed); +} + +inline void spawn(task& t, task_group_context& ctx) { + call_itt_task_notify(releasing, &t); + r1::spawn(t, ctx); +} + +inline void spawn(task& t, task_group_context& ctx, slot_id id) { + call_itt_task_notify(releasing, &t); + r1::spawn(t, ctx, id); +} + +inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) { + r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx); + call_itt_task_notify(acquired, &wait_ctx); + call_itt_task_notify(destroy, &wait_ctx); +} + +inline void wait(wait_context& wait_ctx, task_group_context& ctx) { + r1::wait(wait_ctx, ctx); + call_itt_task_notify(acquired, &wait_ctx); + call_itt_task_notify(destroy, &wait_ctx); +} + +using r1::current_context; + +class task_traits { + std::uint64_t m_version_and_traits{}; + friend struct r1::task_accessor; +}; + +//! Alignment for a task object +static constexpr std::size_t task_alignment = 64; + +//! Base class for user-defined tasks. +/** @ingroup task_scheduling */ + +class +#if __TBB_ALIGNAS_AVAILABLE +alignas(task_alignment) +#endif +task : public task_traits { +protected: + virtual ~task() = default; + +public: + virtual task* execute(execution_data&) = 0; + virtual task* cancel(execution_data&) = 0; + +private: + std::uint64_t m_reserved[6]{}; + friend struct r1::task_accessor; +}; +#if __TBB_ALIGNAS_AVAILABLE +static_assert(sizeof(task) == task_alignment, "task size is broken"); +#endif +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__task_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h index 45a8ffede6..e973bc3128 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h @@ -1,394 +1,394 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__template_helpers_H -#define __TBB_detail__template_helpers_H - -#include "_utils.h" -#include "_config.h" - -#include <cstddef> -#include <cstdint> - -#include <type_traits> -#include <memory> -#include <iterator> - -namespace tbb { -namespace detail { -inline namespace d0 { - -// An internal implementation of void_t, which can be used in SFINAE contexts -template <typename...> -struct void_impl { - using type = void; -}; // struct void_impl - -template <typename... Args> -using void_t = typename void_impl<Args...>::type; - -// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502 -template <typename T, typename, template <typename> class... Checks> -struct supports_impl { - using type = std::false_type; -}; - -template <typename T, template <typename> class... Checks> -struct supports_impl<T, void_t<Checks<T>...>, Checks...> { - using type = std::true_type; -}; - -template <typename T, template <typename> class... Checks> -using supports = typename supports_impl<T, void, Checks...>::type; - -//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size. -template <unsigned u, unsigned long long ull > -struct select_size_t_constant { - // Explicit cast is needed to avoid compiler warnings about possible truncation. - // The value of the right size, which is selected by ?:, is anyway not truncated or promoted. - static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull); -}; - -// TODO: do we really need it? -//! Cast between unrelated pointer types. -/** This method should be used sparingly as a last resort for dealing with - situations that inherently break strict ISO C++ aliasing rules. */ -// T is a pointer type because it will be explicitly provided by the programmer as a template argument; -// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process. -template<typename T, typename U> -inline T punned_cast( U* ptr ) { - std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr); - return reinterpret_cast<T>(x); -} - -template<class T, size_t S, size_t R> -struct padded_base : T { - char pad[S - R]; -}; -template<class T, size_t S> struct padded_base<T, S, 0> : T {}; - -//! Pads type T to fill out to a multiple of cache line size. -template<class T, size_t S = max_nfs_size> -struct padded : padded_base<T, S, sizeof(T) % S> {}; - -#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT - -using std::index_sequence; -using std::make_index_sequence; - -#else - -template<std::size_t... S> class index_sequence {}; - -template<std::size_t N, std::size_t... S> -struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; - -template<std::size_t... S> -struct make_index_sequence_impl <0, S...> { - using type = index_sequence<S...>; -}; - -template<std::size_t N> -using make_index_sequence = typename make_index_sequence_impl<N>::type; - -#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */ - -#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT -using std::conjunction; -using std::disjunction; -#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT - -template <typename...> -struct conjunction : std::true_type {}; - -template <typename First, typename... Args> -struct conjunction<First, Args...> - : std::conditional<bool(First::value), conjunction<Args...>, First>::type {}; - -template <typename T> -struct conjunction<T> : T {}; - -template <typename...> -struct disjunction : std::false_type {}; - -template <typename First, typename... Args> -struct disjunction<First, Args...> - : std::conditional<bool(First::value), First, disjunction<Args...>>::type {}; - -template <typename T> -struct disjunction<T> : T {}; - -#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT - -template <typename Iterator> -using iterator_value_t = typename std::iterator_traits<Iterator>::value_type; - -template <typename Iterator> -using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type; - -template <typename Iterator> -using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type; - -template <typename Iterator> -using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type, - iterator_mapped_t<Iterator>>; - -template <typename A> using alloc_value_type = typename A::value_type; -template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer; -template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0)); -template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0)); - -// alloc_value_type should be checked first, because it can be used in other checks -template <typename T> -using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>; - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template <typename T> -inline constexpr bool is_allocator_v = is_allocator<T>::value; -#endif - -// Template class in which the "type" determines the type of the element number N in pack Args -template <std::size_t N, typename... Args> -struct pack_element { - using type = void; -}; - -template <std::size_t N, typename T, typename... Args> -struct pack_element<N, T, Args...> { - using type = typename pack_element<N-1, Args...>::type; -}; - -template <typename T, typename... Args> -struct pack_element<0, T, Args...> { - using type = T; -}; - -template <std::size_t N, typename... Args> -using pack_element_t = typename pack_element<N, Args...>::type; - -template <typename Func> -class raii_guard { -public: - raii_guard( Func f ) : my_func(f), is_active(true) {} - - ~raii_guard() { - if (is_active) { - my_func(); - } - } - - void dismiss() { - is_active = false; - } -private: - Func my_func; - bool is_active; -}; // class raii_guard - -template <typename Func> -raii_guard<Func> make_raii_guard( Func f ) { - return raii_guard<Func>(f); -} - -template <typename Body> -struct try_call_proxy { - try_call_proxy( Body b ) : body(b) {} - - template <typename OnExceptionBody> - void on_exception( OnExceptionBody on_exception_body ) { - auto guard = make_raii_guard(on_exception_body); - body(); - guard.dismiss(); - } - - template <typename OnCompletionBody> - void on_completion(OnCompletionBody on_completion_body) { - auto guard = make_raii_guard(on_completion_body); - body(); - } - - Body body; -}; // struct try_call_proxy - -// Template helper function for API -// try_call(lambda1).on_exception(lambda2) -// Executes lambda1 and if it throws an exception - executes lambda2 -template <typename Body> -try_call_proxy<Body> try_call( Body b ) { - return try_call_proxy<Body>(b); -} - -#if __TBB_CPP17_IS_SWAPPABLE_PRESENT -using std::is_nothrow_swappable; -using std::is_swappable; -#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT -namespace is_swappable_detail { -using std::swap; - -template <typename T> -using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>())); - -#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER -// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant. -template <typename T> -struct noexcept_wrapper { - static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>())); -}; -template <typename T> -struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {}; -#else -template <typename T> -struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {}; -#endif -} - -template <typename T> -struct is_swappable : supports<T, is_swappable_detail::has_swap> {}; - -template <typename T> -struct is_nothrow_swappable - : conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {}; -#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT - -//! Allows to store a function parameter pack as a variable and later pass it to another function -template< typename... Types > -struct stored_pack; - -template<> -struct stored_pack<> -{ - using pack_type = stored_pack<>; - stored_pack() {} - - // Friend front-end functions - template< typename F, typename Pack > friend void call(F&& f, Pack&& p); - template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); - -protected: - // Ideally, ref-qualified non-static methods would be used, - // but that would greatly reduce the set of compilers where it works. - template< typename Ret, typename F, typename... Preceding > - static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) { - return std::forward<F>(f)(std::forward<Preceding>(params)...); - } - template< typename Ret, typename F, typename... Preceding > - static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) { - return std::forward<F>(f)(std::forward<Preceding>(params)...); - } -}; - -template< typename T, typename... Types > -struct stored_pack<T, Types...> : stored_pack<Types...> -{ - using pack_type = stored_pack<T, Types...>; - using pack_remainder = stored_pack<Types...>; - - // Since lifetime of original values is out of control, copies should be made. - // Thus references should be stripped away from the deduced type. - typename std::decay<T>::type leftmost_value; - - // Here rvalue references act in the same way as forwarding references, - // as long as class template parameters were deduced via forwarding references. - stored_pack(T&& t, Types&&... types) - : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {} - - // Friend front-end functions - template< typename F, typename Pack > friend void call(F&& f, Pack&& p); - template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); - -protected: - template< typename Ret, typename F, typename... Preceding > - static Ret call(F&& f, pack_type& pack, Preceding&&... params) { - return pack_remainder::template call<Ret>( - std::forward<F>(f), static_cast<pack_remainder&>(pack), - std::forward<Preceding>(params)... , pack.leftmost_value - ); - } - - template< typename Ret, typename F, typename... Preceding > - static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { - return pack_remainder::template call<Ret>( - std::forward<F>(f), static_cast<pack_remainder&&>(pack), - std::forward<Preceding>(params)... , std::move(pack.leftmost_value) - ); - } -}; - -//! Calls the given function with arguments taken from a stored_pack -template< typename F, typename Pack > -void call(F&& f, Pack&& p) { - std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p)); -} - -template< typename Ret, typename F, typename Pack > -Ret call_and_return(F&& f, Pack&& p) { - return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p)); -} - -template< typename... Types > -stored_pack<Types...> save_pack(Types&&... types) { - return stored_pack<Types...>(std::forward<Types>(types)...); -} - -// A structure with the value which is equal to Trait::value -// but can be used in the immediate context due to parameter T -template <typename Trait, typename T> -struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {}; - -template <typename Callable> -struct body_arg_detector; - -template <typename Callable, typename ReturnType, typename Arg> -struct body_arg_detector<ReturnType(Callable::*)(Arg)> { - using arg_type = Arg; -}; - -template <typename Callable, typename ReturnType, typename Arg> -struct body_arg_detector<ReturnType(Callable::*)(Arg) const> { - using arg_type = Arg; -}; - -template <typename Callable> -struct argument_detector; - -template <typename Callable> -struct argument_detector { - using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type; -}; - -template <typename ReturnType, typename Arg> -struct argument_detector<ReturnType(*)(Arg)> { - using type = Arg; -}; - -// Detects the argument type of callable, works for callable with one argument. -template <typename Callable> -using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type; - -template <typename T> -struct type_identity { - using type = T; -}; - -template <typename T> -using type_identity_t = typename type_identity<T>::type; - -} // inline namespace d0 -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__template_helpers_H - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__template_helpers_H +#define __TBB_detail__template_helpers_H + +#include "_utils.h" +#include "_config.h" + +#include <cstddef> +#include <cstdint> + +#include <type_traits> +#include <memory> +#include <iterator> + +namespace tbb { +namespace detail { +inline namespace d0 { + +// An internal implementation of void_t, which can be used in SFINAE contexts +template <typename...> +struct void_impl { + using type = void; +}; // struct void_impl + +template <typename... Args> +using void_t = typename void_impl<Args...>::type; + +// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502 +template <typename T, typename, template <typename> class... Checks> +struct supports_impl { + using type = std::false_type; +}; + +template <typename T, template <typename> class... Checks> +struct supports_impl<T, void_t<Checks<T>...>, Checks...> { + using type = std::true_type; +}; + +template <typename T, template <typename> class... Checks> +using supports = typename supports_impl<T, void, Checks...>::type; + +//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size. +template <unsigned u, unsigned long long ull > +struct select_size_t_constant { + // Explicit cast is needed to avoid compiler warnings about possible truncation. + // The value of the right size, which is selected by ?:, is anyway not truncated or promoted. + static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull); +}; + +// TODO: do we really need it? +//! Cast between unrelated pointer types. +/** This method should be used sparingly as a last resort for dealing with + situations that inherently break strict ISO C++ aliasing rules. */ +// T is a pointer type because it will be explicitly provided by the programmer as a template argument; +// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process. +template<typename T, typename U> +inline T punned_cast( U* ptr ) { + std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr); + return reinterpret_cast<T>(x); +} + +template<class T, size_t S, size_t R> +struct padded_base : T { + char pad[S - R]; +}; +template<class T, size_t S> struct padded_base<T, S, 0> : T {}; + +//! Pads type T to fill out to a multiple of cache line size. +template<class T, size_t S = max_nfs_size> +struct padded : padded_base<T, S, sizeof(T) % S> {}; + +#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT + +using std::index_sequence; +using std::make_index_sequence; + +#else + +template<std::size_t... S> class index_sequence {}; + +template<std::size_t N, std::size_t... S> +struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; + +template<std::size_t... S> +struct make_index_sequence_impl <0, S...> { + using type = index_sequence<S...>; +}; + +template<std::size_t N> +using make_index_sequence = typename make_index_sequence_impl<N>::type; + +#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */ + +#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT +using std::conjunction; +using std::disjunction; +#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT + +template <typename...> +struct conjunction : std::true_type {}; + +template <typename First, typename... Args> +struct conjunction<First, Args...> + : std::conditional<bool(First::value), conjunction<Args...>, First>::type {}; + +template <typename T> +struct conjunction<T> : T {}; + +template <typename...> +struct disjunction : std::false_type {}; + +template <typename First, typename... Args> +struct disjunction<First, Args...> + : std::conditional<bool(First::value), First, disjunction<Args...>>::type {}; + +template <typename T> +struct disjunction<T> : T {}; + +#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT + +template <typename Iterator> +using iterator_value_t = typename std::iterator_traits<Iterator>::value_type; + +template <typename Iterator> +using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type; + +template <typename Iterator> +using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type; + +template <typename Iterator> +using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type, + iterator_mapped_t<Iterator>>; + +template <typename A> using alloc_value_type = typename A::value_type; +template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer; +template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0)); +template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0)); + +// alloc_value_type should be checked first, because it can be used in other checks +template <typename T> +using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename T> +inline constexpr bool is_allocator_v = is_allocator<T>::value; +#endif + +// Template class in which the "type" determines the type of the element number N in pack Args +template <std::size_t N, typename... Args> +struct pack_element { + using type = void; +}; + +template <std::size_t N, typename T, typename... Args> +struct pack_element<N, T, Args...> { + using type = typename pack_element<N-1, Args...>::type; +}; + +template <typename T, typename... Args> +struct pack_element<0, T, Args...> { + using type = T; +}; + +template <std::size_t N, typename... Args> +using pack_element_t = typename pack_element<N, Args...>::type; + +template <typename Func> +class raii_guard { +public: + raii_guard( Func f ) : my_func(f), is_active(true) {} + + ~raii_guard() { + if (is_active) { + my_func(); + } + } + + void dismiss() { + is_active = false; + } +private: + Func my_func; + bool is_active; +}; // class raii_guard + +template <typename Func> +raii_guard<Func> make_raii_guard( Func f ) { + return raii_guard<Func>(f); +} + +template <typename Body> +struct try_call_proxy { + try_call_proxy( Body b ) : body(b) {} + + template <typename OnExceptionBody> + void on_exception( OnExceptionBody on_exception_body ) { + auto guard = make_raii_guard(on_exception_body); + body(); + guard.dismiss(); + } + + template <typename OnCompletionBody> + void on_completion(OnCompletionBody on_completion_body) { + auto guard = make_raii_guard(on_completion_body); + body(); + } + + Body body; +}; // struct try_call_proxy + +// Template helper function for API +// try_call(lambda1).on_exception(lambda2) +// Executes lambda1 and if it throws an exception - executes lambda2 +template <typename Body> +try_call_proxy<Body> try_call( Body b ) { + return try_call_proxy<Body>(b); +} + +#if __TBB_CPP17_IS_SWAPPABLE_PRESENT +using std::is_nothrow_swappable; +using std::is_swappable; +#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT +namespace is_swappable_detail { +using std::swap; + +template <typename T> +using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>())); + +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER +// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant. +template <typename T> +struct noexcept_wrapper { + static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>())); +}; +template <typename T> +struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {}; +#else +template <typename T> +struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {}; +#endif +} + +template <typename T> +struct is_swappable : supports<T, is_swappable_detail::has_swap> {}; + +template <typename T> +struct is_nothrow_swappable + : conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {}; +#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT + +//! Allows to store a function parameter pack as a variable and later pass it to another function +template< typename... Types > +struct stored_pack; + +template<> +struct stored_pack<> +{ + using pack_type = stored_pack<>; + stored_pack() {} + + // Friend front-end functions + template< typename F, typename Pack > friend void call(F&& f, Pack&& p); + template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); + +protected: + // Ideally, ref-qualified non-static methods would be used, + // but that would greatly reduce the set of compilers where it works. + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) { + return std::forward<F>(f)(std::forward<Preceding>(params)...); + } + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) { + return std::forward<F>(f)(std::forward<Preceding>(params)...); + } +}; + +template< typename T, typename... Types > +struct stored_pack<T, Types...> : stored_pack<Types...> +{ + using pack_type = stored_pack<T, Types...>; + using pack_remainder = stored_pack<Types...>; + + // Since lifetime of original values is out of control, copies should be made. + // Thus references should be stripped away from the deduced type. + typename std::decay<T>::type leftmost_value; + + // Here rvalue references act in the same way as forwarding references, + // as long as class template parameters were deduced via forwarding references. + stored_pack(T&& t, Types&&... types) + : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {} + + // Friend front-end functions + template< typename F, typename Pack > friend void call(F&& f, Pack&& p); + template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); + +protected: + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type& pack, Preceding&&... params) { + return pack_remainder::template call<Ret>( + std::forward<F>(f), static_cast<pack_remainder&>(pack), + std::forward<Preceding>(params)... , pack.leftmost_value + ); + } + + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { + return pack_remainder::template call<Ret>( + std::forward<F>(f), static_cast<pack_remainder&&>(pack), + std::forward<Preceding>(params)... , std::move(pack.leftmost_value) + ); + } +}; + +//! Calls the given function with arguments taken from a stored_pack +template< typename F, typename Pack > +void call(F&& f, Pack&& p) { + std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p)); +} + +template< typename Ret, typename F, typename Pack > +Ret call_and_return(F&& f, Pack&& p) { + return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p)); +} + +template< typename... Types > +stored_pack<Types...> save_pack(Types&&... types) { + return stored_pack<Types...>(std::forward<Types>(types)...); +} + +// A structure with the value which is equal to Trait::value +// but can be used in the immediate context due to parameter T +template <typename Trait, typename T> +struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {}; + +template <typename Callable> +struct body_arg_detector; + +template <typename Callable, typename ReturnType, typename Arg> +struct body_arg_detector<ReturnType(Callable::*)(Arg)> { + using arg_type = Arg; +}; + +template <typename Callable, typename ReturnType, typename Arg> +struct body_arg_detector<ReturnType(Callable::*)(Arg) const> { + using arg_type = Arg; +}; + +template <typename Callable> +struct argument_detector; + +template <typename Callable> +struct argument_detector { + using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type; +}; + +template <typename ReturnType, typename Arg> +struct argument_detector<ReturnType(*)(Arg)> { + using type = Arg; +}; + +// Detects the argument type of callable, works for callable with one argument. +template <typename Callable> +using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type; + +template <typename T> +struct type_identity { + using type = T; +}; + +template <typename T> +using type_identity_t = typename type_identity<T>::type; + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__template_helpers_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h index d1e02179f8..984bedd70d 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h @@ -1,329 +1,329 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_detail__utils_H -#define __TBB_detail__utils_H - -#include <type_traits> -#include <cstdint> -#include <atomic> - -#include "_config.h" -#include "_assert.h" -#include "_machine.h" - -namespace tbb { -namespace detail { -inline namespace d0 { - -//! Utility template function to prevent "unused" warnings by various compilers. -template<typename... T> void suppress_unused_warning(T&&...) {} - -//! Compile-time constant that is upper bound on cache line/sector size. -/** It should be used only in situations where having a compile-time upper - bound is more useful than a run-time exact answer. - @ingroup memory_allocation */ -constexpr size_t max_nfs_size = 128; - -//! Class that implements exponential backoff. -class atomic_backoff { - //! Time delay, in units of "pause" instructions. - /** Should be equal to approximately the number of "pause" instructions - that take the same time as an context switch. Must be a power of two.*/ - static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16; - std::int32_t count; - -public: - // In many cases, an object of this type is initialized eagerly on hot path, - // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } - // For this reason, the construction cost must be very small! - atomic_backoff() : count(1) {} - // This constructor pauses immediately; do not use on hot paths! - atomic_backoff(bool) : count(1) { pause(); } - - //! No Copy - atomic_backoff(const atomic_backoff&) = delete; - atomic_backoff& operator=(const atomic_backoff&) = delete; - - //! Pause for a while. - void pause() { - if (count <= LOOPS_BEFORE_YIELD) { - machine_pause(count); - // Pause twice as long the next time. - count *= 2; - } else { - // Pause is so long that we might as well yield CPU to scheduler. - yield(); - } - } - - //! Pause for a few times and return false if saturated. - bool bounded_pause() { - machine_pause(count); - if (count < LOOPS_BEFORE_YIELD) { - // Pause twice as long the next time. - count *= 2; - return true; - } else { - return false; - } - } - - void reset() { - count = 1; - } -}; - -//! Spin WHILE the condition is true. -/** T and U should be comparable types. */ -template <typename T, typename C> -void spin_wait_while_condition(const std::atomic<T>& location, C comp) { - atomic_backoff backoff; - while (comp(location.load(std::memory_order_acquire))) { - backoff.pause(); - } -} - -//! Spin WHILE the value of the variable is equal to a given value -/** T and U should be comparable types. */ -template <typename T, typename U> -void spin_wait_while_eq(const std::atomic<T>& location, const U value) { - spin_wait_while_condition(location, [&value](T t) { return t == value; }); -} - -//! Spin UNTIL the value of the variable is equal to a given value -/** T and U should be comparable types. */ -template<typename T, typename U> -void spin_wait_until_eq(const std::atomic<T>& location, const U value) { - spin_wait_while_condition(location, [&value](T t) { return t != value; }); -} - -template <typename T> -std::uintptr_t log2(T in) { - __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); - return machine_log2(in); -} - -template<typename T> -T reverse_bits(T src) { - return machine_reverse_bits(src); -} - -template<typename T> -T reverse_n_bits(T src, std::size_t n) { - __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior."); - return reverse_bits(src) >> (number_of_bits<T>() - n); -} - -// A function to check if passed integer is a power of two -template <typename IntegerType> -constexpr bool is_power_of_two( IntegerType arg ) { - static_assert(std::is_integral<IntegerType>::value, - "An argument for is_power_of_two should be integral type"); - return arg && (0 == (arg & (arg - 1))); -} - -// A function to determine if passed integer is a power of two -// at least as big as another power of two, i.e. for strictly positive i and j, -// with j being a power of two, determines whether i==j<<k for some nonnegative k -template <typename ArgIntegerType, typename DivisorIntegerType> -constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) { - // Divisor should be a power of two - static_assert(std::is_integral<ArgIntegerType>::value, - "An argument for is_power_of_two_at_least should be integral type"); - return 0 == (arg & (arg - divisor)); -} - -// A function to compute arg modulo divisor where divisor is a power of 2. -template<typename ArgIntegerType, typename DivisorIntegerType> -inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) { - __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" ); - return arg & (divisor - 1); -} - -//! A function to check if passed in pointer is aligned on a specific border -template<typename T> -constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) { - return 0 == ((std::uintptr_t)pointer & (alignment - 1)); -} - -#if TBB_USE_ASSERT -static void* const poisoned_ptr = reinterpret_cast<void*>(-1); - -//! Set p to invalid pointer value. -template<typename T> -inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); } - -template<typename T> -inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); } - -/** Expected to be used in assertions only, thus no empty form is defined. **/ -template<typename T> -inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); } - -template<typename T> -inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } -#else -template<typename T> -inline void poison_pointer(T* &) {/*do nothing*/} - -template<typename T> -inline void poison_pointer(std::atomic<T*>&) { /* do nothing */} -#endif /* !TBB_USE_ASSERT */ - -template <std::size_t alignment = 0, typename T> -bool assert_pointer_valid(T* p, const char* comment = nullptr) { - suppress_unused_warning(p, comment); - __TBB_ASSERT(p != nullptr, comment); - __TBB_ASSERT(!is_poisoned(p), comment); -#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER) - __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment); -#endif - // Returns something to simplify assert_pointers_valid implementation. - return true; -} - -template <typename... Args> -void assert_pointers_valid(Args*... p) { - // suppress_unused_warning is used as an evaluation context for the variadic pack. - suppress_unused_warning(assert_pointer_valid(p)...); -} - -//! Base class for types that should not be assigned. -class no_assign { -public: - void operator=(const no_assign&) = delete; - no_assign(const no_assign&) = default; - no_assign() = default; -}; - -//! Base class for types that should not be copied or assigned. -class no_copy: no_assign { -public: - no_copy(const no_copy&) = delete; - no_copy() = default; -}; - -template <typename T> -void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){ - T tmp = lhs.load(std::memory_order_relaxed); - lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed); - rhs.store(tmp, std::memory_order_relaxed); -} - -//! One-time initialization states -enum class do_once_state { - uninitialized = 0, ///< No execution attempts have been undertaken yet - pending, ///< A thread is executing associated do-once routine - executed, ///< Do-once routine has been executed - initialized = executed ///< Convenience alias -}; - -//! One-time initialization function -/** /param initializer Pointer to function without arguments - The variant that returns bool is used for cases when initialization can fail - and it is OK to continue execution, but the state should be reset so that - the initialization attempt was repeated the next time. - /param state Shared state associated with initializer that specifies its - initialization state. Must be initially set to #uninitialized value - (e.g. by means of default static zero initialization). **/ -template <typename F> -void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) { - // The loop in the implementation is necessary to avoid race when thread T2 - // that arrived in the middle of initialization attempt by another thread T1 - // has just made initialization possible. - // In such a case T2 has to rely on T1 to initialize, but T1 may already be past - // the point where it can recognize the changed conditions. - do_once_state expected_state; - while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) { - if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) { - expected_state = do_once_state::uninitialized; -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - using enum_type = typename std::underlying_type<do_once_state>::type; - if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) { -#else - if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) { -#endif - run_initializer( initializer, state ); - break; - } - } - spin_wait_while_eq( state, do_once_state::pending ); - } -} - -// Run the initializer which can not fail -template<typename Functor> -void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) { - f(); - state.store(do_once_state::executed, std::memory_order_release); -} - -#if __TBB_CPP20_CONCEPTS_PRESENT -template <typename T> -concept boolean_testable_impl = std::convertible_to<T, bool>; - -template <typename T> -concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) { - { !std::forward<T>(t) } -> boolean_testable_impl; - }; - -#if __TBB_CPP20_COMPARISONS_PRESENT -struct synthesized_three_way_comparator { - template <typename T1, typename T2> - auto operator()( const T1& lhs, const T2& rhs ) const - requires requires { - { lhs < rhs } -> boolean_testable; - { rhs < lhs } -> boolean_testable; - } - { - if constexpr (std::three_way_comparable_with<T1, T2>) { - return lhs <=> rhs; - } else { - if (lhs < rhs) { - return std::weak_ordering::less; - } - if (rhs < lhs) { - return std::weak_ordering::greater; - } - return std::weak_ordering::equivalent; - } - } -}; // struct synthesized_three_way_comparator - -template <typename T1, typename T2 = T1> -using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(), - std::declval<T2&>())); - -#endif // __TBB_CPP20_COMPARISONS_PRESENT -#endif // __TBB_CPP20_CONCEPTS_PRESENT - -} // namespace d0 - -namespace d1 { - -class delegate_base { -public: - virtual bool operator()() const = 0; - virtual ~delegate_base() {} -}; // class delegate_base - -} // namespace d1 - -} // namespace detail -} // namespace tbb - -#endif // __TBB_detail__utils_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__utils_H +#define __TBB_detail__utils_H + +#include <type_traits> +#include <cstdint> +#include <atomic> + +#include "_config.h" +#include "_assert.h" +#include "_machine.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Utility template function to prevent "unused" warnings by various compilers. +template<typename... T> void suppress_unused_warning(T&&...) {} + +//! Compile-time constant that is upper bound on cache line/sector size. +/** It should be used only in situations where having a compile-time upper + bound is more useful than a run-time exact answer. + @ingroup memory_allocation */ +constexpr size_t max_nfs_size = 128; + +//! Class that implements exponential backoff. +class atomic_backoff { + //! Time delay, in units of "pause" instructions. + /** Should be equal to approximately the number of "pause" instructions + that take the same time as an context switch. Must be a power of two.*/ + static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16; + std::int32_t count; + +public: + // In many cases, an object of this type is initialized eagerly on hot path, + // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } + // For this reason, the construction cost must be very small! + atomic_backoff() : count(1) {} + // This constructor pauses immediately; do not use on hot paths! + atomic_backoff(bool) : count(1) { pause(); } + + //! No Copy + atomic_backoff(const atomic_backoff&) = delete; + atomic_backoff& operator=(const atomic_backoff&) = delete; + + //! Pause for a while. + void pause() { + if (count <= LOOPS_BEFORE_YIELD) { + machine_pause(count); + // Pause twice as long the next time. + count *= 2; + } else { + // Pause is so long that we might as well yield CPU to scheduler. + yield(); + } + } + + //! Pause for a few times and return false if saturated. + bool bounded_pause() { + machine_pause(count); + if (count < LOOPS_BEFORE_YIELD) { + // Pause twice as long the next time. + count *= 2; + return true; + } else { + return false; + } + } + + void reset() { + count = 1; + } +}; + +//! Spin WHILE the condition is true. +/** T and U should be comparable types. */ +template <typename T, typename C> +void spin_wait_while_condition(const std::atomic<T>& location, C comp) { + atomic_backoff backoff; + while (comp(location.load(std::memory_order_acquire))) { + backoff.pause(); + } +} + +//! Spin WHILE the value of the variable is equal to a given value +/** T and U should be comparable types. */ +template <typename T, typename U> +void spin_wait_while_eq(const std::atomic<T>& location, const U value) { + spin_wait_while_condition(location, [&value](T t) { return t == value; }); +} + +//! Spin UNTIL the value of the variable is equal to a given value +/** T and U should be comparable types. */ +template<typename T, typename U> +void spin_wait_until_eq(const std::atomic<T>& location, const U value) { + spin_wait_while_condition(location, [&value](T t) { return t != value; }); +} + +template <typename T> +std::uintptr_t log2(T in) { + __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); + return machine_log2(in); +} + +template<typename T> +T reverse_bits(T src) { + return machine_reverse_bits(src); +} + +template<typename T> +T reverse_n_bits(T src, std::size_t n) { + __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior."); + return reverse_bits(src) >> (number_of_bits<T>() - n); +} + +// A function to check if passed integer is a power of two +template <typename IntegerType> +constexpr bool is_power_of_two( IntegerType arg ) { + static_assert(std::is_integral<IntegerType>::value, + "An argument for is_power_of_two should be integral type"); + return arg && (0 == (arg & (arg - 1))); +} + +// A function to determine if passed integer is a power of two +// at least as big as another power of two, i.e. for strictly positive i and j, +// with j being a power of two, determines whether i==j<<k for some nonnegative k +template <typename ArgIntegerType, typename DivisorIntegerType> +constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) { + // Divisor should be a power of two + static_assert(std::is_integral<ArgIntegerType>::value, + "An argument for is_power_of_two_at_least should be integral type"); + return 0 == (arg & (arg - divisor)); +} + +// A function to compute arg modulo divisor where divisor is a power of 2. +template<typename ArgIntegerType, typename DivisorIntegerType> +inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) { + __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" ); + return arg & (divisor - 1); +} + +//! A function to check if passed in pointer is aligned on a specific border +template<typename T> +constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) { + return 0 == ((std::uintptr_t)pointer & (alignment - 1)); +} + +#if TBB_USE_ASSERT +static void* const poisoned_ptr = reinterpret_cast<void*>(-1); + +//! Set p to invalid pointer value. +template<typename T> +inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); } + +template<typename T> +inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); } + +/** Expected to be used in assertions only, thus no empty form is defined. **/ +template<typename T> +inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); } + +template<typename T> +inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } +#else +template<typename T> +inline void poison_pointer(T* &) {/*do nothing*/} + +template<typename T> +inline void poison_pointer(std::atomic<T*>&) { /* do nothing */} +#endif /* !TBB_USE_ASSERT */ + +template <std::size_t alignment = 0, typename T> +bool assert_pointer_valid(T* p, const char* comment = nullptr) { + suppress_unused_warning(p, comment); + __TBB_ASSERT(p != nullptr, comment); + __TBB_ASSERT(!is_poisoned(p), comment); +#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER) + __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment); +#endif + // Returns something to simplify assert_pointers_valid implementation. + return true; +} + +template <typename... Args> +void assert_pointers_valid(Args*... p) { + // suppress_unused_warning is used as an evaluation context for the variadic pack. + suppress_unused_warning(assert_pointer_valid(p)...); +} + +//! Base class for types that should not be assigned. +class no_assign { +public: + void operator=(const no_assign&) = delete; + no_assign(const no_assign&) = default; + no_assign() = default; +}; + +//! Base class for types that should not be copied or assigned. +class no_copy: no_assign { +public: + no_copy(const no_copy&) = delete; + no_copy() = default; +}; + +template <typename T> +void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){ + T tmp = lhs.load(std::memory_order_relaxed); + lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed); + rhs.store(tmp, std::memory_order_relaxed); +} + +//! One-time initialization states +enum class do_once_state { + uninitialized = 0, ///< No execution attempts have been undertaken yet + pending, ///< A thread is executing associated do-once routine + executed, ///< Do-once routine has been executed + initialized = executed ///< Convenience alias +}; + +//! One-time initialization function +/** /param initializer Pointer to function without arguments + The variant that returns bool is used for cases when initialization can fail + and it is OK to continue execution, but the state should be reset so that + the initialization attempt was repeated the next time. + /param state Shared state associated with initializer that specifies its + initialization state. Must be initially set to #uninitialized value + (e.g. by means of default static zero initialization). **/ +template <typename F> +void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) { + // The loop in the implementation is necessary to avoid race when thread T2 + // that arrived in the middle of initialization attempt by another thread T1 + // has just made initialization possible. + // In such a case T2 has to rely on T1 to initialize, but T1 may already be past + // the point where it can recognize the changed conditions. + do_once_state expected_state; + while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) { + if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) { + expected_state = do_once_state::uninitialized; +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + using enum_type = typename std::underlying_type<do_once_state>::type; + if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) { +#else + if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) { +#endif + run_initializer( initializer, state ); + break; + } + } + spin_wait_while_eq( state, do_once_state::pending ); + } +} + +// Run the initializer which can not fail +template<typename Functor> +void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) { + f(); + state.store(do_once_state::executed, std::memory_order_release); +} + +#if __TBB_CPP20_CONCEPTS_PRESENT +template <typename T> +concept boolean_testable_impl = std::convertible_to<T, bool>; + +template <typename T> +concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) { + { !std::forward<T>(t) } -> boolean_testable_impl; + }; + +#if __TBB_CPP20_COMPARISONS_PRESENT +struct synthesized_three_way_comparator { + template <typename T1, typename T2> + auto operator()( const T1& lhs, const T2& rhs ) const + requires requires { + { lhs < rhs } -> boolean_testable; + { rhs < lhs } -> boolean_testable; + } + { + if constexpr (std::three_way_comparable_with<T1, T2>) { + return lhs <=> rhs; + } else { + if (lhs < rhs) { + return std::weak_ordering::less; + } + if (rhs < lhs) { + return std::weak_ordering::greater; + } + return std::weak_ordering::equivalent; + } + } +}; // struct synthesized_three_way_comparator + +template <typename T1, typename T2 = T1> +using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(), + std::declval<T2&>())); + +#endif // __TBB_CPP20_COMPARISONS_PRESENT +#endif // __TBB_CPP20_CONCEPTS_PRESENT + +} // namespace d0 + +namespace d1 { + +class delegate_base { +public: + virtual bool operator()() const = 0; + virtual ~delegate_base() {} +}; // class delegate_base + +} // namespace d1 + +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__utils_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h index 246447a213..70c1f98336 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h +++ b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h @@ -1,1113 +1,1113 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_enumerable_thread_specific_H -#define __TBB_enumerable_thread_specific_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_assert.h" -#include "detail/_template_helpers.h" -#include "detail/_aligned_space.h" - -#include "concurrent_vector.h" -#include "tbb_allocator.h" -#include "cache_aligned_allocator.h" -#include "profiling.h" - -#include <atomic> -#include <thread> -#include <cstring> // memcpy -#include <cstddef> // std::ptrdiff_t - -#include "task.h" // for task::suspend_point - -#if _WIN32 || _WIN64 -#include <windows.h> -#else -#include <pthread.h> -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -//! enum for selecting between single key and key-per-instance versions -enum ets_key_usage_type { - ets_key_per_instance - , ets_no_key -#if __TBB_RESUMABLE_TASKS - , ets_suspend_aware -#endif -}; - -// Forward declaration to use in internal classes -template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> -class enumerable_thread_specific; - -template <std::size_t ThreadIDSize> -struct internal_ets_key_selector { - using key_type = std::thread::id; - static key_type current_key() { - return std::this_thread::get_id(); - } -}; - -// Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types -#if __INTEL_COMPILER && __APPLE__ -template<> -struct internal_ets_key_selector<sizeof(std::size_t)> { - using key_type = std::size_t; - static key_type current_key() { - auto id = std::this_thread::get_id(); - return reinterpret_cast<key_type&>(id); - } -}; -#endif - -template <ets_key_usage_type ETS_key_type> -struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; - -#if __TBB_RESUMABLE_TASKS -template <> -struct ets_key_selector<ets_suspend_aware> { - using key_type = suspend_point; - static key_type current_key() { - return r1::current_suspend_point(); - } -}; -#endif - -template<ets_key_usage_type ETS_key_type> -class ets_base : detail::no_copy { -protected: - using key_type = typename ets_key_selector<ETS_key_type>::key_type; - -public: - struct slot; - struct array { - array* next; - std::size_t lg_size; - slot& at( std::size_t k ) { - return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; - } - std::size_t size() const { return std::size_t(1) << lg_size; } - std::size_t mask() const { return size() - 1; } - std::size_t start( std::size_t h ) const { - return h >> (8 * sizeof(std::size_t) - lg_size); - } - }; - struct slot { - std::atomic<key_type> key; - void* ptr; - bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } - bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } - bool claim( key_type k ) { - // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size - key_type expected = key_type(); - return key.compare_exchange_strong(expected, k); - } - }; - -protected: - //! Root of linked list of arrays of decreasing size. - /** nullptr if and only if my_count==0. - Each array in the list is half the size of its predecessor. */ - std::atomic<array*> my_root; - std::atomic<std::size_t> my_count; - - virtual void* create_local() = 0; - virtual void* create_array(std::size_t _size) = 0; // _size in bytes - virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes - - array* allocate( std::size_t lg_size ) { - std::size_t n = std::size_t(1) << lg_size; - array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); - a->lg_size = lg_size; - std::memset( a + 1, 0, n * sizeof(slot) ); - return a; - } - void free(array* a) { - std::size_t n = std::size_t(1) << (a->lg_size); - free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); - } - - ets_base() : my_root{nullptr}, my_count{0} {} - virtual ~ets_base(); // g++ complains if this is not virtual - - void* table_lookup( bool& exists ); - void table_clear(); - // The following functions are not used in concurrent context, - // so we don't need synchronization and ITT annotations there. - template <ets_key_usage_type E2> - void table_elementwise_copy( const ets_base& other, - void*(*add_element)(ets_base<E2>&, void*) ) { - __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL); - __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL); - if( !other.my_root.load(std::memory_order_relaxed) ) return; - array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); - my_root.store(root, std::memory_order_relaxed); - root->next = nullptr; - my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); - std::size_t mask = root->mask(); - for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { - for( std::size_t i = 0; i < r->size(); ++i ) { - slot& s1 = r->at(i); - if( !s1.empty() ) { - for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { - slot& s2 = root->at(j); - if( s2.empty() ) { - s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); - s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); - break; - } - else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) - break; - } - } - } - } - } - void table_swap( ets_base& other ) { - __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); - swap_atomics_relaxed(my_root, other.my_root); - swap_atomics_relaxed(my_count, other.my_count); - } -}; - -template<ets_key_usage_type ETS_key_type> -ets_base<ETS_key_type>::~ets_base() { - __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); -} - -template<ets_key_usage_type ETS_key_type> -void ets_base<ETS_key_type>::table_clear() { - while ( array* r = my_root.load(std::memory_order_relaxed) ) { - my_root.store(r->next, std::memory_order_relaxed); - free(r); - } - my_count.store(0, std::memory_order_relaxed); -} - -template<ets_key_usage_type ETS_key_type> -void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { - const key_type k = ets_key_selector<ETS_key_type>::current_key(); - - __TBB_ASSERT(k != key_type(),NULL); - void* found; - std::size_t h = std::hash<key_type>{}(k); - for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { - call_itt_notify(acquired,r); - std::size_t mask=r->mask(); - for(std::size_t i = r->start(h); ;i=(i+1)&mask) { - slot& s = r->at(i); - if( s.empty() ) break; - if( s.match(k) ) { - if( r == my_root.load(std::memory_order_acquire) ) { - // Success at top level - exists = true; - return s.ptr; - } else { - // Success at some other level. Need to insert at top level. - exists = true; - found = s.ptr; - goto insert; - } - } - } - } - // Key does not yet exist. The density of slots in the table does not exceed 0.5, - // for if this will occur a new table is allocated with double the current table - // size, which is swapped in as the new root table. So an empty slot is guaranteed. - exists = false; - found = create_local(); - { - std::size_t c = ++my_count; - array* r = my_root.load(std::memory_order_acquire); - call_itt_notify(acquired,r); - if( !r || c > r->size()/2 ) { - std::size_t s = r ? r->lg_size : 2; - while( c > std::size_t(1)<<(s-1) ) ++s; - array* a = allocate(s); - for(;;) { - a->next = r; - call_itt_notify(releasing,a); - array* new_r = r; - if( my_root.compare_exchange_strong(new_r, a) ) break; - call_itt_notify(acquired, new_r); - __TBB_ASSERT(new_r != nullptr, nullptr); - if( new_r->lg_size >= s ) { - // Another thread inserted an equal or bigger array, so our array is superfluous. - free(a); - break; - } - r = new_r; - } - } - } - insert: - // Whether a slot has been found in an older table, or if it has been inserted at this level, - // it has already been accounted for in the total. Guaranteed to be room for it, and it is - // not present, so search for empty slot and use it. - array* ir = my_root.load(std::memory_order_acquire); - call_itt_notify(acquired, ir); - std::size_t mask = ir->mask(); - for(std::size_t i = ir->start(h);; i = (i+1)&mask) { - slot& s = ir->at(i); - if( s.empty() ) { - if( s.claim(k) ) { - s.ptr = found; - return found; - } - } - } -} - -//! Specialization that exploits native TLS -template <> -class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { - using super = ets_base<ets_no_key>; -#if _WIN32||_WIN64 -#if __TBB_WIN8UI_SUPPORT - using tls_key_t = DWORD; - void create_key() { my_key = FlsAlloc(NULL); } - void destroy_key() { FlsFree(my_key); } - void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } - void* get_tls() { return (void *)FlsGetValue(my_key); } -#else - using tls_key_t = DWORD; - void create_key() { my_key = TlsAlloc(); } - void destroy_key() { TlsFree(my_key); } - void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } - void* get_tls() { return (void *)TlsGetValue(my_key); } -#endif -#else - using tls_key_t = pthread_key_t; - void create_key() { pthread_key_create(&my_key, NULL); } - void destroy_key() { pthread_key_delete(my_key); } - void set_tls( void * value ) const { pthread_setspecific(my_key, value); } - void* get_tls() const { return pthread_getspecific(my_key); } -#endif - tls_key_t my_key; - virtual void* create_local() override = 0; - virtual void* create_array(std::size_t _size) override = 0; // _size in bytes - virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes -protected: - ets_base() {create_key();} - ~ets_base() {destroy_key();} - void* table_lookup( bool& exists ) { - void* found = get_tls(); - if( found ) { - exists=true; - } else { - found = super::table_lookup(exists); - set_tls(found); - } - return found; - } - void table_clear() { - destroy_key(); - create_key(); - super::table_clear(); - } - void table_swap( ets_base& other ) { - using std::swap; - __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); - swap(my_key, other.my_key); - super::table_swap(other); - } -}; - -//! Random access iterator for traversing the thread local copies. -template< typename Container, typename Value > -class enumerable_thread_specific_iterator -{ - //! current position in the concurrent_vector - - Container *my_container; - typename Container::size_type my_index; - mutable Value *my_value; - - template<typename C, typename T, typename U> - friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, - const enumerable_thread_specific_iterator<C, U>& j ); - - template<typename C, typename T, typename U> - friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, - const enumerable_thread_specific_iterator<C,U>& j ); - - template<typename C, typename T, typename U> - friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, - const enumerable_thread_specific_iterator<C,U>& j ); - - template<typename C, typename U> - friend class enumerable_thread_specific_iterator; - -public: - //! STL support - using difference_type = std::ptrdiff_t; - using value_type = Value; - using pointer = Value*; - using reference = Value&; - using iterator_category = std::random_access_iterator_tag; - - enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : - my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} - - //! Default constructor - enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} - - template<typename U> - enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : - my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} - - enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { - return enumerable_thread_specific_iterator(*my_container, my_index + offset); - } - - friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { - return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); - } - - enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { - my_index += offset; - my_value = nullptr; - return *this; - } - - enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { - return enumerable_thread_specific_iterator( *my_container, my_index-offset ); - } - - enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { - my_index -= offset; - my_value = nullptr; - return *this; - } - - Value& operator*() const { - Value* value = my_value; - if( !value ) { - value = my_value = (*my_container)[my_index].value(); - } - __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); - return *value; - } - - Value& operator[]( std::ptrdiff_t k ) const { - return *(*my_container)[my_index + k].value(); - } - - Value* operator->() const {return &operator*();} - - enumerable_thread_specific_iterator& operator++() { - ++my_index; - my_value = nullptr; - return *this; - } - - enumerable_thread_specific_iterator& operator--() { - --my_index; - my_value = nullptr; - return *this; - } - - //! Post increment - enumerable_thread_specific_iterator operator++(int) { - enumerable_thread_specific_iterator result = *this; - ++my_index; - my_value = nullptr; - return result; - } - - //! Post decrement - enumerable_thread_specific_iterator operator--(int) { - enumerable_thread_specific_iterator result = *this; - --my_index; - my_value = nullptr; - return result; - } -}; - -template<typename Container, typename T, typename U> -bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, - const enumerable_thread_specific_iterator<Container, U>& j ) { - return i.my_index == j.my_index && i.my_container == j.my_container; -} - -template<typename Container, typename T, typename U> -bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return !(i==j); -} - -template<typename Container, typename T, typename U> -bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return i.my_index<j.my_index; -} - -template<typename Container, typename T, typename U> -bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return j<i; -} - -template<typename Container, typename T, typename U> -bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return !(i<j); -} - -template<typename Container, typename T, typename U> -bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return !(j<i); -} - -template<typename Container, typename T, typename U> -std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, - const enumerable_thread_specific_iterator<Container,U>& j ) { - return i.my_index-j.my_index; -} - -template<typename SegmentedContainer, typename Value > -class segmented_iterator -{ - template<typename C, typename T, typename U> - friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); - - template<typename C, typename T, typename U> - friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); - - template<typename C, typename U> - friend class segmented_iterator; - -public: - segmented_iterator() {my_segcont = nullptr;} - - segmented_iterator( const SegmentedContainer& _segmented_container ) : - my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), - outer_iter(my_segcont->end()) { } - - ~segmented_iterator() {} - - using InnerContainer = typename SegmentedContainer::value_type; - using inner_iterator = typename InnerContainer::iterator; - using outer_iterator = typename SegmentedContainer::iterator; - - // STL support - // TODO: inherit all types from segmented container? - using difference_type = std::ptrdiff_t; - using value_type = Value; - using size_type = typename SegmentedContainer::size_type; - using pointer = Value*; - using reference = Value&; - using iterator_category = std::input_iterator_tag; - - // Copy Constructor - template<typename U> - segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : - my_segcont(other.my_segcont), - outer_iter(other.outer_iter), - // can we assign a default-constructed iterator to inner if we're at the end? - inner_iter(other.inner_iter) - {} - - // assignment - template<typename U> - segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { - my_segcont = other.my_segcont; - outer_iter = other.outer_iter; - if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; - return *this; - } - - // allow assignment of outer iterator to segmented iterator. Once it is - // assigned, move forward until a non-empty inner container is found or - // the end of the outer container is reached. - segmented_iterator& operator=(const outer_iterator& new_outer_iter) { - __TBB_ASSERT(my_segcont != nullptr, NULL); - // check that this iterator points to something inside the segmented container - for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { - if( !outer_iter->empty() ) { - inner_iter = outer_iter->begin(); - break; - } - } - return *this; - } - - // pre-increment - segmented_iterator& operator++() { - advance_me(); - return *this; - } - - // post-increment - segmented_iterator operator++(int) { - segmented_iterator tmp = *this; - operator++(); - return tmp; - } - - bool operator==(const outer_iterator& other_outer) const { - __TBB_ASSERT(my_segcont != nullptr, NULL); - return (outer_iter == other_outer && - (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); - } - - bool operator!=(const outer_iterator& other_outer) const { - return !operator==(other_outer); - - } - - // (i)* RHS - reference operator*() const { - __TBB_ASSERT(my_segcont != nullptr, NULL); - __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); - __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen - return *inner_iter; - } - - // i-> - pointer operator->() const { return &operator*();} - -private: - SegmentedContainer* my_segcont; - outer_iterator outer_iter; - inner_iterator inner_iter; - - void advance_me() { - __TBB_ASSERT(my_segcont != nullptr, NULL); - __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers - __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty. - ++inner_iter; - while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { - inner_iter = outer_iter->begin(); - } - } -}; // segmented_iterator - -template<typename SegmentedContainer, typename T, typename U> -bool operator==( const segmented_iterator<SegmentedContainer,T>& i, - const segmented_iterator<SegmentedContainer,U>& j ) { - if(i.my_segcont != j.my_segcont) return false; - if(i.my_segcont == nullptr) return true; - if(i.outer_iter != j.outer_iter) return false; - if(i.outer_iter == i.my_segcont->end()) return true; - return i.inner_iter == j.inner_iter; -} - -// != -template<typename SegmentedContainer, typename T, typename U> -bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, - const segmented_iterator<SegmentedContainer,U>& j ) { - return !(i==j); -} - -template<typename T> -struct construct_by_default: no_assign { - void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. - construct_by_default( int ) {} -}; - -template<typename T> -struct construct_by_exemplar: no_assign { - const T exemplar; - void construct(void*where) {new(where) T(exemplar);} - construct_by_exemplar( const T& t ) : exemplar(t) {} - construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} -}; - -template<typename T, typename Finit> -struct construct_by_finit: no_assign { - Finit f; - void construct(void* where) {new(where) T(f());} - construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} -}; - -template<typename T, typename... P> -struct construct_by_args: no_assign { - stored_pack<P...> pack; - void construct(void* where) { - call( [where](const typename std::decay<P>::type&... args ){ - new(where) T(args...); - }, pack ); - } - construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} -}; - -// storage for initialization function pointer -// TODO: consider removing the template parameter T here and in callback_leaf -class callback_base { -public: - // Clone *this - virtual callback_base* clone() const = 0; - // Destruct and free *this - virtual void destroy() = 0; - // Need virtual destructor to satisfy GCC compiler warning - virtual ~callback_base() { } - // Construct T at where - virtual void construct(void* where) = 0; -}; - -template <typename Constructor> -class callback_leaf: public callback_base, Constructor { - template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} - // TODO: make the construction/destruction consistent (use allocator.construct/destroy) - using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; - - callback_base* clone() const override { - return make(*this); - } - - void destroy() override { - my_allocator_type alloc; - tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); - tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); - } - - void construct(void* where) override { - Constructor::construct(where); - } - -public: - template<typename... P> - static callback_base* make( P&& ... params ) { - void* where = my_allocator_type().allocate(1); - return new(where) callback_leaf( std::forward<P>(params)... ); - } -}; - -//! Template for recording construction of objects in table -/** All maintenance of the space will be done explicitly on push_back, - and all thread local copies must be destroyed before the concurrent - vector is deleted. - - The flag is_built is initialized to false. When the local is - successfully-constructed, set the flag to true or call value_committed(). - If the constructor throws, the flag will be false. -*/ -template<typename U> -struct ets_element { - detail::aligned_space<U> my_space; - bool is_built; - ets_element() { is_built = false; } // not currently-built - U* value() { return my_space.begin(); } - U* value_committed() { is_built = true; return my_space.begin(); } - ~ets_element() { - if(is_built) { - my_space.begin()->~U(); - is_built = false; - } - } -}; - -// A predicate that can be used for a compile-time compatibility check of ETS instances -// Ideally, it should have been declared inside the ETS class, but unfortunately -// in that case VS2013 does not enable the variadic constructor. -template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; -template<typename T, typename U, typename A, ets_key_usage_type C> -struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; - -// A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression -template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); -template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; - -//! The enumerable_thread_specific container -/** enumerable_thread_specific has the following properties: - - thread-local copies are lazily created, with default, exemplar or function initialization. - - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. - - the contained objects need not have operator=() defined if combine is not used. - - enumerable_thread_specific containers may be copy-constructed or assigned. - - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. - - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods - -@par Segmented iterator - When the thread-local objects are containers with input_iterators defined, a segmented iterator may - be used to iterate over all the elements of all thread-local copies. - -@par combine and combine_each - - Both methods are defined for enumerable_thread_specific. - - combine() requires the type T have operator=() defined. - - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) - - Both are evaluated in serial context (the methods are assumed to be non-benign.) - -@ingroup containers */ -template <typename T, typename Allocator=cache_aligned_allocator<T>, - ets_key_usage_type ETS_key_type=ets_no_key > -class enumerable_thread_specific: ets_base<ETS_key_type> { - - template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; - - using padded_element = padded<ets_element<T>>; - - //! A generic range, used to create range objects from the iterators - template<typename I> - class generic_range_type: public blocked_range<I> { - public: - using value_type = T; - using reference = T&; - using const_reference = const T&; - using iterator = I; - using difference_type = std::ptrdiff_t; - - generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} - template<typename U> - generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} - generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} - }; - - using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; - - using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; - using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; - - callback_base *my_construct_callback; - - internal_collection_type my_locals; - - // TODO: consider unifying the callback mechanism for all create_local* methods below - // (likely non-compatible and requires interface version increase) - void* create_local() override { - padded_element& lref = *my_locals.grow_by(1); - my_construct_callback->construct(lref.value()); - return lref.value_committed(); - } - - static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { - enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); - padded_element& lref = *ets.my_locals.grow_by(1); - new(lref.value()) T(*static_cast<T*>(p)); - return lref.value_committed(); - } - - static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { - enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); - padded_element& lref = *ets.my_locals.grow_by(1); - new(lref.value()) T(std::move(*static_cast<T*>(p))); - return lref.value_committed(); - } - - using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; - - // _size is in bytes - void* create_array(std::size_t _size) override { - std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); - return array_allocator_type().allocate(nelements); - } - - void free_array( void* _ptr, std::size_t _size) override { - std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); - array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); - } - -public: - - //! Basic types - using value_type = T; - using allocator_type = Allocator; - using size_type = typename internal_collection_type::size_type; - using difference_type = typename internal_collection_type::difference_type; - using reference = value_type&; - using const_reference = const value_type&; - - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - - // Iterator types - using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; - using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; - - // Parallel range types - using range_type = generic_range_type<iterator>; - using const_range_type = generic_range_type<const_iterator>; - - //! Default constructor. Each local instance of T is default constructed. - enumerable_thread_specific() : my_construct_callback( - callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) - ){} - - //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). - template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> - explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( - callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) - ){} - - //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. - explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( - callback_leaf<construct_by_exemplar<T> >::make( exemplar ) - ){} - - explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( - callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) - ){} - - //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) - template <typename P1, typename... P, - typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value - && !is_compatible_ets<T, typename std::decay<P1>::type>::value - && !std::is_same<T, typename std::decay<P1>::type>::value - >::type> - enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( - callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) - ){} - - //! Destructor - ~enumerable_thread_specific() { - if(my_construct_callback) my_construct_callback->destroy(); - // Deallocate the hash table before overridden free_array() becomes inaccessible - this->ets_base<ETS_key_type>::table_clear(); - } - - //! returns reference to local, discarding exists - reference local() { - bool exists; - return local(exists); - } - - //! Returns reference to calling thread's local copy, creating one if necessary - reference local(bool& exists) { - void* ptr = this->table_lookup(exists); - return *(T*)ptr; - } - - //! Get the number of local copies - size_type size() const { return my_locals.size(); } - - //! true if there have been no local copies created - bool empty() const { return my_locals.empty(); } - - //! begin iterator - iterator begin() { return iterator( my_locals, 0 ); } - //! end iterator - iterator end() { return iterator(my_locals, my_locals.size() ); } - - //! begin const iterator - const_iterator begin() const { return const_iterator(my_locals, 0); } - - //! end const iterator - const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } - - //! Get range for parallel algorithms - range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } - - //! Get const range for parallel algorithms - const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } - - //! Destroys local copies - void clear() { - my_locals.clear(); - this->table_clear(); - // callback is not destroyed - } - -private: - template<typename A2, ets_key_usage_type C2> - void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { - // this tests is_compatible_ets - static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); - // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. - my_construct_callback = other.my_construct_callback->clone(); - __TBB_ASSERT(my_locals.size()==0,NULL); - my_locals.reserve(other.size()); - this->table_elementwise_copy( other, create_local_by_copy ); - } - - void internal_swap(enumerable_thread_specific& other) { - using std::swap; - __TBB_ASSERT( this!=&other, NULL ); - swap(my_construct_callback, other.my_construct_callback); - // concurrent_vector::swap() preserves storage space, - // so addresses to the vector kept in ETS hash table remain valid. - swap(my_locals, other.my_locals); - this->ets_base<ETS_key_type>::table_swap(other); - } - - template<typename A2, ets_key_usage_type C2> - void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { - static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); - my_construct_callback = other.my_construct_callback; - other.my_construct_callback = nullptr; - __TBB_ASSERT(my_locals.size()==0,NULL); - my_locals.reserve(other.size()); - this->table_elementwise_copy( other, create_local_by_move ); - } - -public: - enumerable_thread_specific( const enumerable_thread_specific& other ) - : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ - { - internal_copy(other); - } - - template<typename Alloc, ets_key_usage_type Cachetype> - enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) - { - internal_copy(other); - } - - enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() - { - // TODO: use internal_move correctly here - internal_swap(other); - } - - template<typename Alloc, ets_key_usage_type Cachetype> - enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() - { - internal_move(std::move(other)); - } - - enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) - { - if( this != &other ) { - this->clear(); - my_construct_callback->destroy(); - internal_copy( other ); - } - return *this; - } - - template<typename Alloc, ets_key_usage_type Cachetype> - enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) - { - __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types - this->clear(); - my_construct_callback->destroy(); - internal_copy(other); - return *this; - } - - enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) - { - if( this != &other ) { - // TODO: use internal_move correctly here - internal_swap(other); - } - return *this; - } - - template<typename Alloc, ets_key_usage_type Cachetype> - enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) - { - __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types - this->clear(); - my_construct_callback->destroy(); - internal_move(std::move(other)); - return *this; - } - - // CombineFunc has signature T(T,T) or T(const T&, const T&) - template <typename CombineFunc> - T combine(CombineFunc f_combine) { - if(begin() == end()) { - ets_element<T> location; - my_construct_callback->construct(location.value()); - return *location.value_committed(); - } - const_iterator ci = begin(); - T my_result = *ci; - while(++ci != end()) - my_result = f_combine( my_result, *ci ); - return my_result; - } - - // combine_func_t takes T by value or by [const] reference, and returns nothing - template <typename CombineFunc> - void combine_each(CombineFunc f_combine) { - for(iterator ci = begin(); ci != end(); ++ci) { - f_combine( *ci ); - } - } - -}; // enumerable_thread_specific - -template< typename Container > -class flattened2d { - // This intermediate typedef is to address issues with VC7.1 compilers - using conval_type = typename Container::value_type; - -public: - //! Basic types - using size_type = typename conval_type::size_type; - using difference_type = typename conval_type::difference_type; - using allocator_type = typename conval_type::allocator_type; - using value_type = typename conval_type::value_type; - using reference = typename conval_type::reference; - using const_reference = typename conval_type::const_reference; - using pointer = typename conval_type::pointer; - using const_pointer = typename conval_type::const_pointer; - - using iterator = segmented_iterator<Container, value_type>; - using const_iterator = segmented_iterator<Container, const value_type>; - - flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : - my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } - - explicit flattened2d( const Container &c ) : - my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } - - iterator begin() { return iterator(*my_container) = my_begin; } - iterator end() { return iterator(*my_container) = my_end; } - const_iterator begin() const { return const_iterator(*my_container) = my_begin; } - const_iterator end() const { return const_iterator(*my_container) = my_end; } - - size_type size() const { - size_type tot_size = 0; - for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { - tot_size += i->size(); - } - return tot_size; - } - -private: - Container *my_container; - typename Container::const_iterator my_begin; - typename Container::const_iterator my_end; -}; - -template <typename Container> -flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { - return flattened2d<Container>(c, b, e); -} - -template <typename Container> -flattened2d<Container> flatten2d(const Container &c) { - return flattened2d<Container>(c); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::enumerable_thread_specific; -using detail::d1::flattened2d; -using detail::d1::flatten2d; -// ets enum keys -using detail::d1::ets_key_usage_type; -using detail::d1::ets_key_per_instance; -using detail::d1::ets_no_key; -#if __TBB_RESUMABLE_TASKS -using detail::d1::ets_suspend_aware; -#endif -} // inline namespace v1 - -} // namespace tbb - -#endif // __TBB_enumerable_thread_specific_H - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_enumerable_thread_specific_H +#define __TBB_enumerable_thread_specific_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_template_helpers.h" +#include "detail/_aligned_space.h" + +#include "concurrent_vector.h" +#include "tbb_allocator.h" +#include "cache_aligned_allocator.h" +#include "profiling.h" + +#include <atomic> +#include <thread> +#include <cstring> // memcpy +#include <cstddef> // std::ptrdiff_t + +#include "task.h" // for task::suspend_point + +#if _WIN32 || _WIN64 +#include <windows.h> +#else +#include <pthread.h> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +//! enum for selecting between single key and key-per-instance versions +enum ets_key_usage_type { + ets_key_per_instance + , ets_no_key +#if __TBB_RESUMABLE_TASKS + , ets_suspend_aware +#endif +}; + +// Forward declaration to use in internal classes +template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> +class enumerable_thread_specific; + +template <std::size_t ThreadIDSize> +struct internal_ets_key_selector { + using key_type = std::thread::id; + static key_type current_key() { + return std::this_thread::get_id(); + } +}; + +// Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types +#if __INTEL_COMPILER && __APPLE__ +template<> +struct internal_ets_key_selector<sizeof(std::size_t)> { + using key_type = std::size_t; + static key_type current_key() { + auto id = std::this_thread::get_id(); + return reinterpret_cast<key_type&>(id); + } +}; +#endif + +template <ets_key_usage_type ETS_key_type> +struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; + +#if __TBB_RESUMABLE_TASKS +template <> +struct ets_key_selector<ets_suspend_aware> { + using key_type = suspend_point; + static key_type current_key() { + return r1::current_suspend_point(); + } +}; +#endif + +template<ets_key_usage_type ETS_key_type> +class ets_base : detail::no_copy { +protected: + using key_type = typename ets_key_selector<ETS_key_type>::key_type; + +public: + struct slot; + struct array { + array* next; + std::size_t lg_size; + slot& at( std::size_t k ) { + return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; + } + std::size_t size() const { return std::size_t(1) << lg_size; } + std::size_t mask() const { return size() - 1; } + std::size_t start( std::size_t h ) const { + return h >> (8 * sizeof(std::size_t) - lg_size); + } + }; + struct slot { + std::atomic<key_type> key; + void* ptr; + bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } + bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } + bool claim( key_type k ) { + // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size + key_type expected = key_type(); + return key.compare_exchange_strong(expected, k); + } + }; + +protected: + //! Root of linked list of arrays of decreasing size. + /** nullptr if and only if my_count==0. + Each array in the list is half the size of its predecessor. */ + std::atomic<array*> my_root; + std::atomic<std::size_t> my_count; + + virtual void* create_local() = 0; + virtual void* create_array(std::size_t _size) = 0; // _size in bytes + virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes + + array* allocate( std::size_t lg_size ) { + std::size_t n = std::size_t(1) << lg_size; + array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); + a->lg_size = lg_size; + std::memset( a + 1, 0, n * sizeof(slot) ); + return a; + } + void free(array* a) { + std::size_t n = std::size_t(1) << (a->lg_size); + free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); + } + + ets_base() : my_root{nullptr}, my_count{0} {} + virtual ~ets_base(); // g++ complains if this is not virtual + + void* table_lookup( bool& exists ); + void table_clear(); + // The following functions are not used in concurrent context, + // so we don't need synchronization and ITT annotations there. + template <ets_key_usage_type E2> + void table_elementwise_copy( const ets_base& other, + void*(*add_element)(ets_base<E2>&, void*) ) { + __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL); + __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL); + if( !other.my_root.load(std::memory_order_relaxed) ) return; + array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); + my_root.store(root, std::memory_order_relaxed); + root->next = nullptr; + my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + std::size_t mask = root->mask(); + for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { + for( std::size_t i = 0; i < r->size(); ++i ) { + slot& s1 = r->at(i); + if( !s1.empty() ) { + for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { + slot& s2 = root->at(j); + if( s2.empty() ) { + s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); + s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + break; + } + else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) + break; + } + } + } + } + } + void table_swap( ets_base& other ) { + __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); + swap_atomics_relaxed(my_root, other.my_root); + swap_atomics_relaxed(my_count, other.my_count); + } +}; + +template<ets_key_usage_type ETS_key_type> +ets_base<ETS_key_type>::~ets_base() { + __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); +} + +template<ets_key_usage_type ETS_key_type> +void ets_base<ETS_key_type>::table_clear() { + while ( array* r = my_root.load(std::memory_order_relaxed) ) { + my_root.store(r->next, std::memory_order_relaxed); + free(r); + } + my_count.store(0, std::memory_order_relaxed); +} + +template<ets_key_usage_type ETS_key_type> +void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { + const key_type k = ets_key_selector<ETS_key_type>::current_key(); + + __TBB_ASSERT(k != key_type(),NULL); + void* found; + std::size_t h = std::hash<key_type>{}(k); + for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { + call_itt_notify(acquired,r); + std::size_t mask=r->mask(); + for(std::size_t i = r->start(h); ;i=(i+1)&mask) { + slot& s = r->at(i); + if( s.empty() ) break; + if( s.match(k) ) { + if( r == my_root.load(std::memory_order_acquire) ) { + // Success at top level + exists = true; + return s.ptr; + } else { + // Success at some other level. Need to insert at top level. + exists = true; + found = s.ptr; + goto insert; + } + } + } + } + // Key does not yet exist. The density of slots in the table does not exceed 0.5, + // for if this will occur a new table is allocated with double the current table + // size, which is swapped in as the new root table. So an empty slot is guaranteed. + exists = false; + found = create_local(); + { + std::size_t c = ++my_count; + array* r = my_root.load(std::memory_order_acquire); + call_itt_notify(acquired,r); + if( !r || c > r->size()/2 ) { + std::size_t s = r ? r->lg_size : 2; + while( c > std::size_t(1)<<(s-1) ) ++s; + array* a = allocate(s); + for(;;) { + a->next = r; + call_itt_notify(releasing,a); + array* new_r = r; + if( my_root.compare_exchange_strong(new_r, a) ) break; + call_itt_notify(acquired, new_r); + __TBB_ASSERT(new_r != nullptr, nullptr); + if( new_r->lg_size >= s ) { + // Another thread inserted an equal or bigger array, so our array is superfluous. + free(a); + break; + } + r = new_r; + } + } + } + insert: + // Whether a slot has been found in an older table, or if it has been inserted at this level, + // it has already been accounted for in the total. Guaranteed to be room for it, and it is + // not present, so search for empty slot and use it. + array* ir = my_root.load(std::memory_order_acquire); + call_itt_notify(acquired, ir); + std::size_t mask = ir->mask(); + for(std::size_t i = ir->start(h);; i = (i+1)&mask) { + slot& s = ir->at(i); + if( s.empty() ) { + if( s.claim(k) ) { + s.ptr = found; + return found; + } + } + } +} + +//! Specialization that exploits native TLS +template <> +class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { + using super = ets_base<ets_no_key>; +#if _WIN32||_WIN64 +#if __TBB_WIN8UI_SUPPORT + using tls_key_t = DWORD; + void create_key() { my_key = FlsAlloc(NULL); } + void destroy_key() { FlsFree(my_key); } + void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } + void* get_tls() { return (void *)FlsGetValue(my_key); } +#else + using tls_key_t = DWORD; + void create_key() { my_key = TlsAlloc(); } + void destroy_key() { TlsFree(my_key); } + void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } + void* get_tls() { return (void *)TlsGetValue(my_key); } +#endif +#else + using tls_key_t = pthread_key_t; + void create_key() { pthread_key_create(&my_key, NULL); } + void destroy_key() { pthread_key_delete(my_key); } + void set_tls( void * value ) const { pthread_setspecific(my_key, value); } + void* get_tls() const { return pthread_getspecific(my_key); } +#endif + tls_key_t my_key; + virtual void* create_local() override = 0; + virtual void* create_array(std::size_t _size) override = 0; // _size in bytes + virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes +protected: + ets_base() {create_key();} + ~ets_base() {destroy_key();} + void* table_lookup( bool& exists ) { + void* found = get_tls(); + if( found ) { + exists=true; + } else { + found = super::table_lookup(exists); + set_tls(found); + } + return found; + } + void table_clear() { + destroy_key(); + create_key(); + super::table_clear(); + } + void table_swap( ets_base& other ) { + using std::swap; + __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); + swap(my_key, other.my_key); + super::table_swap(other); + } +}; + +//! Random access iterator for traversing the thread local copies. +template< typename Container, typename Value > +class enumerable_thread_specific_iterator +{ + //! current position in the concurrent_vector + + Container *my_container; + typename Container::size_type my_index; + mutable Value *my_value; + + template<typename C, typename T, typename U> + friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, + const enumerable_thread_specific_iterator<C, U>& j ); + + template<typename C, typename T, typename U> + friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, + const enumerable_thread_specific_iterator<C,U>& j ); + + template<typename C, typename T, typename U> + friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, + const enumerable_thread_specific_iterator<C,U>& j ); + + template<typename C, typename U> + friend class enumerable_thread_specific_iterator; + +public: + //! STL support + using difference_type = std::ptrdiff_t; + using value_type = Value; + using pointer = Value*; + using reference = Value&; + using iterator_category = std::random_access_iterator_tag; + + enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : + my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} + + //! Default constructor + enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} + + template<typename U> + enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : + my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} + + enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { + return enumerable_thread_specific_iterator(*my_container, my_index + offset); + } + + friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { + return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); + } + + enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { + my_index += offset; + my_value = nullptr; + return *this; + } + + enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { + return enumerable_thread_specific_iterator( *my_container, my_index-offset ); + } + + enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { + my_index -= offset; + my_value = nullptr; + return *this; + } + + Value& operator*() const { + Value* value = my_value; + if( !value ) { + value = my_value = (*my_container)[my_index].value(); + } + __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); + return *value; + } + + Value& operator[]( std::ptrdiff_t k ) const { + return *(*my_container)[my_index + k].value(); + } + + Value* operator->() const {return &operator*();} + + enumerable_thread_specific_iterator& operator++() { + ++my_index; + my_value = nullptr; + return *this; + } + + enumerable_thread_specific_iterator& operator--() { + --my_index; + my_value = nullptr; + return *this; + } + + //! Post increment + enumerable_thread_specific_iterator operator++(int) { + enumerable_thread_specific_iterator result = *this; + ++my_index; + my_value = nullptr; + return result; + } + + //! Post decrement + enumerable_thread_specific_iterator operator--(int) { + enumerable_thread_specific_iterator result = *this; + --my_index; + my_value = nullptr; + return result; + } +}; + +template<typename Container, typename T, typename U> +bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, + const enumerable_thread_specific_iterator<Container, U>& j ) { + return i.my_index == j.my_index && i.my_container == j.my_container; +} + +template<typename Container, typename T, typename U> +bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(i==j); +} + +template<typename Container, typename T, typename U> +bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return i.my_index<j.my_index; +} + +template<typename Container, typename T, typename U> +bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return j<i; +} + +template<typename Container, typename T, typename U> +bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(i<j); +} + +template<typename Container, typename T, typename U> +bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(j<i); +} + +template<typename Container, typename T, typename U> +std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return i.my_index-j.my_index; +} + +template<typename SegmentedContainer, typename Value > +class segmented_iterator +{ + template<typename C, typename T, typename U> + friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); + + template<typename C, typename T, typename U> + friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); + + template<typename C, typename U> + friend class segmented_iterator; + +public: + segmented_iterator() {my_segcont = nullptr;} + + segmented_iterator( const SegmentedContainer& _segmented_container ) : + my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), + outer_iter(my_segcont->end()) { } + + ~segmented_iterator() {} + + using InnerContainer = typename SegmentedContainer::value_type; + using inner_iterator = typename InnerContainer::iterator; + using outer_iterator = typename SegmentedContainer::iterator; + + // STL support + // TODO: inherit all types from segmented container? + using difference_type = std::ptrdiff_t; + using value_type = Value; + using size_type = typename SegmentedContainer::size_type; + using pointer = Value*; + using reference = Value&; + using iterator_category = std::input_iterator_tag; + + // Copy Constructor + template<typename U> + segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : + my_segcont(other.my_segcont), + outer_iter(other.outer_iter), + // can we assign a default-constructed iterator to inner if we're at the end? + inner_iter(other.inner_iter) + {} + + // assignment + template<typename U> + segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { + my_segcont = other.my_segcont; + outer_iter = other.outer_iter; + if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; + return *this; + } + + // allow assignment of outer iterator to segmented iterator. Once it is + // assigned, move forward until a non-empty inner container is found or + // the end of the outer container is reached. + segmented_iterator& operator=(const outer_iterator& new_outer_iter) { + __TBB_ASSERT(my_segcont != nullptr, NULL); + // check that this iterator points to something inside the segmented container + for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { + if( !outer_iter->empty() ) { + inner_iter = outer_iter->begin(); + break; + } + } + return *this; + } + + // pre-increment + segmented_iterator& operator++() { + advance_me(); + return *this; + } + + // post-increment + segmented_iterator operator++(int) { + segmented_iterator tmp = *this; + operator++(); + return tmp; + } + + bool operator==(const outer_iterator& other_outer) const { + __TBB_ASSERT(my_segcont != nullptr, NULL); + return (outer_iter == other_outer && + (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); + } + + bool operator!=(const outer_iterator& other_outer) const { + return !operator==(other_outer); + + } + + // (i)* RHS + reference operator*() const { + __TBB_ASSERT(my_segcont != nullptr, NULL); + __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); + __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen + return *inner_iter; + } + + // i-> + pointer operator->() const { return &operator*();} + +private: + SegmentedContainer* my_segcont; + outer_iterator outer_iter; + inner_iterator inner_iter; + + void advance_me() { + __TBB_ASSERT(my_segcont != nullptr, NULL); + __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers + __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty. + ++inner_iter; + while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { + inner_iter = outer_iter->begin(); + } + } +}; // segmented_iterator + +template<typename SegmentedContainer, typename T, typename U> +bool operator==( const segmented_iterator<SegmentedContainer,T>& i, + const segmented_iterator<SegmentedContainer,U>& j ) { + if(i.my_segcont != j.my_segcont) return false; + if(i.my_segcont == nullptr) return true; + if(i.outer_iter != j.outer_iter) return false; + if(i.outer_iter == i.my_segcont->end()) return true; + return i.inner_iter == j.inner_iter; +} + +// != +template<typename SegmentedContainer, typename T, typename U> +bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, + const segmented_iterator<SegmentedContainer,U>& j ) { + return !(i==j); +} + +template<typename T> +struct construct_by_default: no_assign { + void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. + construct_by_default( int ) {} +}; + +template<typename T> +struct construct_by_exemplar: no_assign { + const T exemplar; + void construct(void*where) {new(where) T(exemplar);} + construct_by_exemplar( const T& t ) : exemplar(t) {} + construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} +}; + +template<typename T, typename Finit> +struct construct_by_finit: no_assign { + Finit f; + void construct(void* where) {new(where) T(f());} + construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} +}; + +template<typename T, typename... P> +struct construct_by_args: no_assign { + stored_pack<P...> pack; + void construct(void* where) { + call( [where](const typename std::decay<P>::type&... args ){ + new(where) T(args...); + }, pack ); + } + construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} +}; + +// storage for initialization function pointer +// TODO: consider removing the template parameter T here and in callback_leaf +class callback_base { +public: + // Clone *this + virtual callback_base* clone() const = 0; + // Destruct and free *this + virtual void destroy() = 0; + // Need virtual destructor to satisfy GCC compiler warning + virtual ~callback_base() { } + // Construct T at where + virtual void construct(void* where) = 0; +}; + +template <typename Constructor> +class callback_leaf: public callback_base, Constructor { + template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} + // TODO: make the construction/destruction consistent (use allocator.construct/destroy) + using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; + + callback_base* clone() const override { + return make(*this); + } + + void destroy() override { + my_allocator_type alloc; + tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); + tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); + } + + void construct(void* where) override { + Constructor::construct(where); + } + +public: + template<typename... P> + static callback_base* make( P&& ... params ) { + void* where = my_allocator_type().allocate(1); + return new(where) callback_leaf( std::forward<P>(params)... ); + } +}; + +//! Template for recording construction of objects in table +/** All maintenance of the space will be done explicitly on push_back, + and all thread local copies must be destroyed before the concurrent + vector is deleted. + + The flag is_built is initialized to false. When the local is + successfully-constructed, set the flag to true or call value_committed(). + If the constructor throws, the flag will be false. +*/ +template<typename U> +struct ets_element { + detail::aligned_space<U> my_space; + bool is_built; + ets_element() { is_built = false; } // not currently-built + U* value() { return my_space.begin(); } + U* value_committed() { is_built = true; return my_space.begin(); } + ~ets_element() { + if(is_built) { + my_space.begin()->~U(); + is_built = false; + } + } +}; + +// A predicate that can be used for a compile-time compatibility check of ETS instances +// Ideally, it should have been declared inside the ETS class, but unfortunately +// in that case VS2013 does not enable the variadic constructor. +template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; +template<typename T, typename U, typename A, ets_key_usage_type C> +struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; + +// A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression +template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); +template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; + +//! The enumerable_thread_specific container +/** enumerable_thread_specific has the following properties: + - thread-local copies are lazily created, with default, exemplar or function initialization. + - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. + - the contained objects need not have operator=() defined if combine is not used. + - enumerable_thread_specific containers may be copy-constructed or assigned. + - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. + - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods + +@par Segmented iterator + When the thread-local objects are containers with input_iterators defined, a segmented iterator may + be used to iterate over all the elements of all thread-local copies. + +@par combine and combine_each + - Both methods are defined for enumerable_thread_specific. + - combine() requires the type T have operator=() defined. + - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) + - Both are evaluated in serial context (the methods are assumed to be non-benign.) + +@ingroup containers */ +template <typename T, typename Allocator=cache_aligned_allocator<T>, + ets_key_usage_type ETS_key_type=ets_no_key > +class enumerable_thread_specific: ets_base<ETS_key_type> { + + template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; + + using padded_element = padded<ets_element<T>>; + + //! A generic range, used to create range objects from the iterators + template<typename I> + class generic_range_type: public blocked_range<I> { + public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + using iterator = I; + using difference_type = std::ptrdiff_t; + + generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} + template<typename U> + generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} + generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} + }; + + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + + using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; + using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; + + callback_base *my_construct_callback; + + internal_collection_type my_locals; + + // TODO: consider unifying the callback mechanism for all create_local* methods below + // (likely non-compatible and requires interface version increase) + void* create_local() override { + padded_element& lref = *my_locals.grow_by(1); + my_construct_callback->construct(lref.value()); + return lref.value_committed(); + } + + static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { + enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); + padded_element& lref = *ets.my_locals.grow_by(1); + new(lref.value()) T(*static_cast<T*>(p)); + return lref.value_committed(); + } + + static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { + enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); + padded_element& lref = *ets.my_locals.grow_by(1); + new(lref.value()) T(std::move(*static_cast<T*>(p))); + return lref.value_committed(); + } + + using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; + + // _size is in bytes + void* create_array(std::size_t _size) override { + std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); + return array_allocator_type().allocate(nelements); + } + + void free_array( void* _ptr, std::size_t _size) override { + std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); + array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); + } + +public: + + //! Basic types + using value_type = T; + using allocator_type = Allocator; + using size_type = typename internal_collection_type::size_type; + using difference_type = typename internal_collection_type::difference_type; + using reference = value_type&; + using const_reference = const value_type&; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + // Iterator types + using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; + using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; + + // Parallel range types + using range_type = generic_range_type<iterator>; + using const_range_type = generic_range_type<const_iterator>; + + //! Default constructor. Each local instance of T is default constructed. + enumerable_thread_specific() : my_construct_callback( + callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) + ){} + + //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). + template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> + explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( + callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) + ){} + + //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. + explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( + callback_leaf<construct_by_exemplar<T> >::make( exemplar ) + ){} + + explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( + callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) + ){} + + //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) + template <typename P1, typename... P, + typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value + && !is_compatible_ets<T, typename std::decay<P1>::type>::value + && !std::is_same<T, typename std::decay<P1>::type>::value + >::type> + enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( + callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) + ){} + + //! Destructor + ~enumerable_thread_specific() { + if(my_construct_callback) my_construct_callback->destroy(); + // Deallocate the hash table before overridden free_array() becomes inaccessible + this->ets_base<ETS_key_type>::table_clear(); + } + + //! returns reference to local, discarding exists + reference local() { + bool exists; + return local(exists); + } + + //! Returns reference to calling thread's local copy, creating one if necessary + reference local(bool& exists) { + void* ptr = this->table_lookup(exists); + return *(T*)ptr; + } + + //! Get the number of local copies + size_type size() const { return my_locals.size(); } + + //! true if there have been no local copies created + bool empty() const { return my_locals.empty(); } + + //! begin iterator + iterator begin() { return iterator( my_locals, 0 ); } + //! end iterator + iterator end() { return iterator(my_locals, my_locals.size() ); } + + //! begin const iterator + const_iterator begin() const { return const_iterator(my_locals, 0); } + + //! end const iterator + const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } + + //! Get range for parallel algorithms + range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } + + //! Get const range for parallel algorithms + const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } + + //! Destroys local copies + void clear() { + my_locals.clear(); + this->table_clear(); + // callback is not destroyed + } + +private: + template<typename A2, ets_key_usage_type C2> + void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { + // this tests is_compatible_ets + static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); + // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. + my_construct_callback = other.my_construct_callback->clone(); + __TBB_ASSERT(my_locals.size()==0,NULL); + my_locals.reserve(other.size()); + this->table_elementwise_copy( other, create_local_by_copy ); + } + + void internal_swap(enumerable_thread_specific& other) { + using std::swap; + __TBB_ASSERT( this!=&other, NULL ); + swap(my_construct_callback, other.my_construct_callback); + // concurrent_vector::swap() preserves storage space, + // so addresses to the vector kept in ETS hash table remain valid. + swap(my_locals, other.my_locals); + this->ets_base<ETS_key_type>::table_swap(other); + } + + template<typename A2, ets_key_usage_type C2> + void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { + static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); + my_construct_callback = other.my_construct_callback; + other.my_construct_callback = nullptr; + __TBB_ASSERT(my_locals.size()==0,NULL); + my_locals.reserve(other.size()); + this->table_elementwise_copy( other, create_local_by_move ); + } + +public: + enumerable_thread_specific( const enumerable_thread_specific& other ) + : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ + { + internal_copy(other); + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) + { + internal_copy(other); + } + + enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() + { + // TODO: use internal_move correctly here + internal_swap(other); + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() + { + internal_move(std::move(other)); + } + + enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) + { + if( this != &other ) { + this->clear(); + my_construct_callback->destroy(); + internal_copy( other ); + } + return *this; + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) + { + __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types + this->clear(); + my_construct_callback->destroy(); + internal_copy(other); + return *this; + } + + enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) + { + if( this != &other ) { + // TODO: use internal_move correctly here + internal_swap(other); + } + return *this; + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) + { + __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types + this->clear(); + my_construct_callback->destroy(); + internal_move(std::move(other)); + return *this; + } + + // CombineFunc has signature T(T,T) or T(const T&, const T&) + template <typename CombineFunc> + T combine(CombineFunc f_combine) { + if(begin() == end()) { + ets_element<T> location; + my_construct_callback->construct(location.value()); + return *location.value_committed(); + } + const_iterator ci = begin(); + T my_result = *ci; + while(++ci != end()) + my_result = f_combine( my_result, *ci ); + return my_result; + } + + // combine_func_t takes T by value or by [const] reference, and returns nothing + template <typename CombineFunc> + void combine_each(CombineFunc f_combine) { + for(iterator ci = begin(); ci != end(); ++ci) { + f_combine( *ci ); + } + } + +}; // enumerable_thread_specific + +template< typename Container > +class flattened2d { + // This intermediate typedef is to address issues with VC7.1 compilers + using conval_type = typename Container::value_type; + +public: + //! Basic types + using size_type = typename conval_type::size_type; + using difference_type = typename conval_type::difference_type; + using allocator_type = typename conval_type::allocator_type; + using value_type = typename conval_type::value_type; + using reference = typename conval_type::reference; + using const_reference = typename conval_type::const_reference; + using pointer = typename conval_type::pointer; + using const_pointer = typename conval_type::const_pointer; + + using iterator = segmented_iterator<Container, value_type>; + using const_iterator = segmented_iterator<Container, const value_type>; + + flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : + my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } + + explicit flattened2d( const Container &c ) : + my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } + + iterator begin() { return iterator(*my_container) = my_begin; } + iterator end() { return iterator(*my_container) = my_end; } + const_iterator begin() const { return const_iterator(*my_container) = my_begin; } + const_iterator end() const { return const_iterator(*my_container) = my_end; } + + size_type size() const { + size_type tot_size = 0; + for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { + tot_size += i->size(); + } + return tot_size; + } + +private: + Container *my_container; + typename Container::const_iterator my_begin; + typename Container::const_iterator my_end; +}; + +template <typename Container> +flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { + return flattened2d<Container>(c, b, e); +} + +template <typename Container> +flattened2d<Container> flatten2d(const Container &c) { + return flattened2d<Container>(c); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::enumerable_thread_specific; +using detail::d1::flattened2d; +using detail::d1::flatten2d; +// ets enum keys +using detail::d1::ets_key_usage_type; +using detail::d1::ets_key_per_instance; +using detail::d1::ets_no_key; +#if __TBB_RESUMABLE_TASKS +using detail::d1::ets_suspend_aware; +#endif +} // inline namespace v1 + +} // namespace tbb + +#endif // __TBB_enumerable_thread_specific_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h index cc2cc7b605..45dc7191fc 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h @@ -1,3221 +1,3221 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_flow_graph_H -#define __TBB_flow_graph_H - -#include <atomic> -#include <memory> -#include <type_traits> - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "spin_mutex.h" -#include "null_mutex.h" -#include "spin_rw_mutex.h" -#include "null_rw_mutex.h" -#include "detail/_pipeline_filters.h" -#include "detail/_task.h" -#include "detail/_small_object_pool.h" -#include "cache_aligned_allocator.h" -#include "detail/_exception.h" -#include "detail/_template_helpers.h" -#include "detail/_aggregator.h" -#include "detail/_allocator_traits.h" -#include "profiling.h" -#include "task_arena.h" - -#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) - #if __INTEL_COMPILER - // Disabled warning "routine is both inline and noinline" - #pragma warning (push) - #pragma warning( disable: 2196 ) - #endif - #define __TBB_NOINLINE_SYM __attribute__((noinline)) -#else - #define __TBB_NOINLINE_SYM -#endif - -#include <tuple> -#include <list> -#include <queue> - -/** @file - \brief The graph related classes and functions - - There are some applications that best express dependencies as messages - passed between nodes in a graph. These messages may contain data or - simply act as signals that a predecessors has completed. The graph - class and its associated node classes can be used to express such - applications. -*/ - -namespace tbb { -namespace detail { - -namespace d1 { - -//! An enumeration the provides the two most common concurrency levels: unlimited and serial -enum concurrency { unlimited = 0, serial = 1 }; - -//! A generic null type -struct null_type {}; - -//! An empty class used for messages that mean "I'm done" -class continue_msg {}; - -//! Forward declaration section -template< typename T > class sender; -template< typename T > class receiver; -class continue_receiver; - -template< typename T, typename U > class limiter_node; // needed for resetting decrementer - -template<typename T, typename M> class successor_cache; -template<typename T, typename M> class broadcast_cache; -template<typename T, typename M> class round_robin_cache; -template<typename T, typename M> class predecessor_cache; -template<typename T, typename M> class reservable_predecessor_cache; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -namespace order { -struct following; -struct preceding; -} -template<typename Order, typename... Args> struct node_set; -#endif - - -} // namespace d1 -} // namespace detail -} // namespace tbb - -//! The graph class -#include "detail/_flow_graph_impl.h" - -namespace tbb { -namespace detail { -namespace d1 { - -static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) { - if (second->priority > first->priority) - return std::make_pair(second, first); - return std::make_pair(first, second); -} - -// submit task if necessary. Returns the non-enqueued task if there is one. -static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) { - // if no RHS task, don't change left. - if (right == NULL) return left; - // right != NULL - if (left == NULL) return right; - if (left == SUCCESSFULLY_ENQUEUED) return right; - // left contains a task - if (right != SUCCESSFULLY_ENQUEUED) { - // both are valid tasks - auto tasks_pair = order_tasks(left, right); - spawn_in_graph_arena(g, *tasks_pair.first); - return tasks_pair.second; - } - return left; -} - -//! Pure virtual template class that defines a sender of messages of type T -template< typename T > -class sender { -public: - virtual ~sender() {} - - //! Request an item from the sender - virtual bool try_get( T & ) { return false; } - - //! Reserves an item in the sender - virtual bool try_reserve( T & ) { return false; } - - //! Releases the reserved item - virtual bool try_release( ) { return false; } - - //! Consumes the reserved item - virtual bool try_consume( ) { return false; } - -protected: - //! The output type of this sender - typedef T output_type; - - //! The successor type for this node - typedef receiver<T> successor_type; - - //! Add a new successor to this node - virtual bool register_successor( successor_type &r ) = 0; - - //! Removes a successor from this node - virtual bool remove_successor( successor_type &r ) = 0; - - template<typename C> - friend bool register_successor(sender<C>& s, receiver<C>& r); - - template<typename C> - friend bool remove_successor (sender<C>& s, receiver<C>& r); -}; // class sender<T> - -template<typename C> -bool register_successor(sender<C>& s, receiver<C>& r) { - return s.register_successor(r); -} - -template<typename C> -bool remove_successor(sender<C>& s, receiver<C>& r) { - return s.remove_successor(r); -} - -//! Pure virtual template class that defines a receiver of messages of type T -template< typename T > -class receiver { -public: - //! Destructor - virtual ~receiver() {} - - //! Put an item to the receiver - bool try_put( const T& t ) { - graph_task *res = try_put_task(t); - if (!res) return false; - if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); - return true; - } - - //! put item to successor; return task to run the successor if possible. -protected: - //! The input type of this receiver - typedef T input_type; - - //! The predecessor type for this node - typedef sender<T> predecessor_type; - - template< typename R, typename B > friend class run_and_put_task; - template< typename X, typename Y > friend class broadcast_cache; - template< typename X, typename Y > friend class round_robin_cache; - virtual graph_task *try_put_task(const T& t) = 0; - virtual graph& graph_reference() const = 0; - - template<typename TT, typename M> friend class successor_cache; - virtual bool is_continue_receiver() { return false; } - - // TODO revamp: reconsider the inheritance and move node priority out of receiver - virtual node_priority_t priority() const { return no_priority; } - - //! Add a predecessor to the node - virtual bool register_predecessor( predecessor_type & ) { return false; } - - //! Remove a predecessor from the node - virtual bool remove_predecessor( predecessor_type & ) { return false; } - - template <typename C> - friend bool register_predecessor(receiver<C>& r, sender<C>& s); - template <typename C> - friend bool remove_predecessor (receiver<C>& r, sender<C>& s); -}; // class receiver<T> - -template <typename C> -bool register_predecessor(receiver<C>& r, sender<C>& s) { - return r.register_predecessor(s); -} - -template <typename C> -bool remove_predecessor(receiver<C>& r, sender<C>& s) { - return r.remove_predecessor(s); -} - -//! Base class for receivers of completion messages -/** These receivers automatically reset, but cannot be explicitly waited on */ -class continue_receiver : public receiver< continue_msg > { -protected: - - //! Constructor - explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) { - my_predecessor_count = my_initial_predecessor_count = number_of_predecessors; - my_current_count = 0; - my_priority = a_priority; - } - - //! Copy constructor - continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() { - my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count; - my_current_count = 0; - my_priority = src.my_priority; - } - - //! Increments the trigger threshold - bool register_predecessor( predecessor_type & ) override { - spin_mutex::scoped_lock l(my_mutex); - ++my_predecessor_count; - return true; - } - - //! Decrements the trigger threshold - /** Does not check to see if the removal of the predecessor now makes the current count - exceed the new threshold. So removing a predecessor while the graph is active can cause - unexpected results. */ - bool remove_predecessor( predecessor_type & ) override { - spin_mutex::scoped_lock l(my_mutex); - --my_predecessor_count; - return true; - } - - //! The input type - typedef continue_msg input_type; - - //! The predecessor type for this node - typedef receiver<input_type>::predecessor_type predecessor_type; - - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - // execute body is supposed to be too small to create a task for. - graph_task* try_put_task( const input_type & ) override { - { - spin_mutex::scoped_lock l(my_mutex); - if ( ++my_current_count < my_predecessor_count ) - return SUCCESSFULLY_ENQUEUED; - else - my_current_count = 0; - } - graph_task* res = execute(); - return res? res : SUCCESSFULLY_ENQUEUED; - } - - spin_mutex my_mutex; - int my_predecessor_count; - int my_current_count; - int my_initial_predecessor_count; - node_priority_t my_priority; - // the friend declaration in the base class did not eliminate the "protected class" - // error in gcc 4.1.2 - template<typename U, typename V> friend class limiter_node; - - virtual void reset_receiver( reset_flags f ) { - my_current_count = 0; - if (f & rf_clear_edges) { - my_predecessor_count = my_initial_predecessor_count; - } - } - - //! Does whatever should happen when the threshold is reached - /** This should be very fast or else spawn a task. This is - called while the sender is blocked in the try_put(). */ - virtual graph_task* execute() = 0; - template<typename TT, typename M> friend class successor_cache; - bool is_continue_receiver() override { return true; } - - node_priority_t priority() const override { return my_priority; } -}; // class continue_receiver - -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - template <typename K, typename T> - K key_from_message( const T &t ) { - return t.key(); - } -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - -} // d1 -} // detail -} // tbb - -#include "detail/_flow_graph_trace_impl.h" -#include "detail/_hash_compare.h" - -namespace tbb { -namespace detail { -namespace d1 { - -#include "detail/_flow_graph_body_impl.h" -#include "detail/_flow_graph_cache_impl.h" -#include "detail/_flow_graph_types_impl.h" - -using namespace graph_policy_namespace; - -template <typename C, typename N> -graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL) -{ - if (begin) current_node = my_graph->my_nodes; - //else it is an end iterator by default -} - -template <typename C, typename N> -typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const { - __TBB_ASSERT(current_node, "graph_iterator at end"); - return *operator->(); -} - -template <typename C, typename N> -typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const { - return current_node; -} - -template <typename C, typename N> -void graph_iterator<C,N>::internal_forward() { - if (current_node) current_node = current_node->next; -} - -//! Constructs a graph with isolated task_group_context -inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { - prepare_task_arena(); - own_context = true; - cancelled = false; - caught_exception = false; - my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS); - fgt_graph(this); - my_is_active = true; -} - -inline graph::graph(task_group_context& use_this_context) : - my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { - prepare_task_arena(); - own_context = false; - cancelled = false; - caught_exception = false; - fgt_graph(this); - my_is_active = true; -} - -inline graph::~graph() { - wait_for_all(); - if (own_context) { - my_context->~task_group_context(); - r1::cache_aligned_deallocate(my_context); - } - delete my_task_arena; -} - -inline void graph::reserve_wait() { - my_wait_context.reserve(); - fgt_reserve_wait(this); -} - -inline void graph::release_wait() { - fgt_release_wait(this); - my_wait_context.release(); -} - -inline void graph::register_node(graph_node *n) { - n->next = NULL; - { - spin_mutex::scoped_lock lock(nodelist_mutex); - n->prev = my_nodes_last; - if (my_nodes_last) my_nodes_last->next = n; - my_nodes_last = n; - if (!my_nodes) my_nodes = n; - } -} - -inline void graph::remove_node(graph_node *n) { - { - spin_mutex::scoped_lock lock(nodelist_mutex); - __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes"); - if (n->prev) n->prev->next = n->next; - if (n->next) n->next->prev = n->prev; - if (my_nodes_last == n) my_nodes_last = n->prev; - if (my_nodes == n) my_nodes = n->next; - } - n->prev = n->next = NULL; -} - -inline void graph::reset( reset_flags f ) { - // reset context - deactivate_graph(*this); - - my_context->reset(); - cancelled = false; - caught_exception = false; - // reset all the nodes comprising the graph - for(iterator ii = begin(); ii != end(); ++ii) { - graph_node *my_p = &(*ii); - my_p->reset_node(f); - } - // Reattach the arena. Might be useful to run the graph in a particular task_arena - // while not limiting graph lifetime to a single task_arena::execute() call. - prepare_task_arena( /*reinit=*/true ); - activate_graph(*this); -} - -inline void graph::cancel() { - my_context->cancel_group_execution(); -} - -inline graph::iterator graph::begin() { return iterator(this, true); } - -inline graph::iterator graph::end() { return iterator(this, false); } - -inline graph::const_iterator graph::begin() const { return const_iterator(this, true); } - -inline graph::const_iterator graph::end() const { return const_iterator(this, false); } - -inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); } - -inline graph::const_iterator graph::cend() const { return const_iterator(this, false); } - -inline graph_node::graph_node(graph& g) : my_graph(g) { - my_graph.register_node(this); -} - -inline graph_node::~graph_node() { - my_graph.remove_node(this); -} - -#include "detail/_flow_graph_node_impl.h" - - -//! An executable node that acts as a source, i.e. it has no predecessors - -template < typename Output > -class input_node : public graph_node, public sender< Output > { -public: - //! The type of the output message, which is complete - typedef Output output_type; - - //! The type of successors of this node - typedef typename sender<output_type>::successor_type successor_type; - - // Input node has no input type - typedef null_type input_type; - - //! Constructor for a node with a successor - template< typename Body > - __TBB_NOINLINE_SYM input_node( graph &g, Body body ) - : graph_node(g), my_active(false) - , my_body( new input_body_leaf< output_type, Body>(body) ) - , my_init_body( new input_body_leaf< output_type, Body>(body) ) - , my_successors(this), my_reserved(false), my_has_cached_item(false) - { - fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, - static_cast<sender<output_type> *>(this), this->my_body); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Successors> - input_node( const node_set<order::preceding, Successors...>& successors, Body body ) - : input_node(successors.graph_reference(), body) - { - make_edges(*this, successors); - } -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM input_node( const input_node& src ) - : graph_node(src.my_graph), sender<Output>() - , my_active(false) - , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) - , my_successors(this), my_reserved(false), my_has_cached_item(false) - { - fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, - static_cast<sender<output_type> *>(this), this->my_body); - } - - //! The destructor - ~input_node() { delete my_body; delete my_init_body; } - - //! Add a new successor to this node - bool register_successor( successor_type &r ) override { - spin_mutex::scoped_lock lock(my_mutex); - my_successors.register_successor(r); - if ( my_active ) - spawn_put(); - return true; - } - - //! Removes a successor from this node - bool remove_successor( successor_type &r ) override { - spin_mutex::scoped_lock lock(my_mutex); - my_successors.remove_successor(r); - return true; - } - - //! Request an item from the node - bool try_get( output_type &v ) override { - spin_mutex::scoped_lock lock(my_mutex); - if ( my_reserved ) - return false; - - if ( my_has_cached_item ) { - v = my_cached_item; - my_has_cached_item = false; - return true; - } - // we've been asked to provide an item, but we have none. enqueue a task to - // provide one. - if ( my_active ) - spawn_put(); - return false; - } - - //! Reserves an item. - bool try_reserve( output_type &v ) override { - spin_mutex::scoped_lock lock(my_mutex); - if ( my_reserved ) { - return false; - } - - if ( my_has_cached_item ) { - v = my_cached_item; - my_reserved = true; - return true; - } else { - return false; - } - } - - //! Release a reserved item. - /** true = item has been released and so remains in sender, dest must request or reserve future items */ - bool try_release( ) override { - spin_mutex::scoped_lock lock(my_mutex); - __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" ); - my_reserved = false; - if(!my_successors.empty()) - spawn_put(); - return true; - } - - //! Consumes a reserved item - bool try_consume( ) override { - spin_mutex::scoped_lock lock(my_mutex); - __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" ); - my_reserved = false; - my_has_cached_item = false; - if ( !my_successors.empty() ) { - spawn_put(); - } - return true; - } - - //! Activates a node that was created in the inactive state - void activate() { - spin_mutex::scoped_lock lock(my_mutex); - my_active = true; - if (!my_successors.empty()) - spawn_put(); - } - - template<typename Body> - Body copy_function_object() { - input_body<output_type> &body_ref = *this->my_body; - return dynamic_cast< input_body_leaf<output_type, Body> & >(body_ref).get_body(); - } - -protected: - - //! resets the input_node to its initial state - void reset_node( reset_flags f) override { - my_active = false; - my_reserved = false; - my_has_cached_item = false; - - if(f & rf_clear_edges) my_successors.clear(); - if(f & rf_reset_bodies) { - input_body<output_type> *tmp = my_init_body->clone(); - delete my_body; - my_body = tmp; - } - } - -private: - spin_mutex my_mutex; - bool my_active; - input_body<output_type> *my_body; - input_body<output_type> *my_init_body; - broadcast_cache< output_type > my_successors; - bool my_reserved; - bool my_has_cached_item; - output_type my_cached_item; - - // used by apply_body_bypass, can invoke body of node. - bool try_reserve_apply_body(output_type &v) { - spin_mutex::scoped_lock lock(my_mutex); - if ( my_reserved ) { - return false; - } - if ( !my_has_cached_item ) { - flow_control control; - - fgt_begin_body( my_body ); - - my_cached_item = (*my_body)(control); - my_has_cached_item = !control.is_pipeline_stopped; - - fgt_end_body( my_body ); - } - if ( my_has_cached_item ) { - v = my_cached_item; - my_reserved = true; - return true; - } else { - return false; - } - } - - graph_task* create_put_task() { - small_object_allocator allocator{}; - typedef input_node_task_bypass< input_node<output_type> > task_type; - graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - return t; - } - - //! Spawns a task that applies the body - void spawn_put( ) { - if(is_graph_active(this->my_graph)) { - spawn_in_graph_arena(this->my_graph, *create_put_task()); - } - } - - friend class input_node_task_bypass< input_node<output_type> >; - //! Applies the body. Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it. - graph_task* apply_body_bypass( ) { - output_type v; - if ( !try_reserve_apply_body(v) ) - return NULL; - - graph_task *last_task = my_successors.try_put_task(v); - if ( last_task ) - try_consume(); - else - try_release(); - return last_task; - } -}; // class input_node - -//! Implements a function node that supports Input -> Output -template<typename Input, typename Output = continue_msg, typename Policy = queueing> -class function_node - : public graph_node - , public function_input< Input, Output, Policy, cache_aligned_allocator<Input> > - , public function_output<Output> -{ - typedef cache_aligned_allocator<Input> internals_allocator; - -public: - typedef Input input_type; - typedef Output output_type; - typedef function_input<input_type,output_type,Policy,internals_allocator> input_impl_type; - typedef function_input_queue<input_type, internals_allocator> input_queue_type; - typedef function_output<output_type> fOutput_type; - typedef typename input_impl_type::predecessor_type predecessor_type; - typedef typename fOutput_type::successor_type successor_type; - - using input_impl_type::my_predecessors; - - //! Constructor - // input_queue_type is allocated here, but destroyed in the function_input_base. - // TODO: pass the graph_buffer_policy to the function_input_base so it can all - // be done in one place. This would be an interface-breaking change. - template< typename Body > - __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency, - Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) - : graph_node(g), input_impl_type(g, concurrency, body, a_priority), - fOutput_type(g) { - fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); - } - - template <typename Body> - function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority ) - : function_node(g, concurrency, body, Policy(), a_priority) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Args> - function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, - Policy p = Policy(), node_priority_t a_priority = no_priority ) - : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) { - make_edges_in_order(nodes, *this); - } - - template <typename Body, typename... Args> - function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority ) - : function_node(nodes, concurrency, body, Policy(), a_priority) {} -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - - //! Copy constructor - __TBB_NOINLINE_SYM function_node( const function_node& src ) : - graph_node(src.my_graph), - input_impl_type(src), - fOutput_type(src.my_graph) { - fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); - } - -protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - using input_impl_type::try_put_task; - - broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } - - void reset_node(reset_flags f) override { - input_impl_type::reset_function_input(f); - // TODO: use clear() instead. - if(f & rf_clear_edges) { - successors().clear(); - my_predecessors.clear(); - } - __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty"); - __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty"); - } - -}; // class function_node - -//! implements a function node that supports Input -> (set of outputs) -// Output is a tuple of output types. -template<typename Input, typename Output, typename Policy = queueing> -class multifunction_node : - public graph_node, - public multifunction_input - < - Input, - typename wrap_tuple_elements< - std::tuple_size<Output>::value, // #elements in tuple - multifunction_output, // wrap this around each element - Output // the tuple providing the types - >::type, - Policy, - cache_aligned_allocator<Input> - > -{ - typedef cache_aligned_allocator<Input> internals_allocator; - -protected: - static const int N = std::tuple_size<Output>::value; -public: - typedef Input input_type; - typedef null_type output_type; - typedef typename wrap_tuple_elements<N,multifunction_output, Output>::type output_ports_type; - typedef multifunction_input< - input_type, output_ports_type, Policy, internals_allocator> input_impl_type; - typedef function_input_queue<input_type, internals_allocator> input_queue_type; -private: - using input_impl_type::my_predecessors; -public: - template<typename Body> - __TBB_NOINLINE_SYM multifunction_node( - graph &g, size_t concurrency, - Body body, Policy = Policy(), node_priority_t a_priority = no_priority - ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) { - fgt_multioutput_node_with_body<N>( - CODEPTR(), FLOW_MULTIFUNCTION_NODE, - &this->my_graph, static_cast<receiver<input_type> *>(this), - this->output_ports(), this->my_body - ); - } - - template <typename Body> - __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) - : multifunction_node(g, concurrency, body, Policy(), a_priority) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Args> - __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, - Policy p = Policy(), node_priority_t a_priority = no_priority) - : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) { - make_edges_in_order(nodes, *this); - } - - template <typename Body, typename... Args> - __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) - : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {} -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - - __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) : - graph_node(other.my_graph), input_impl_type(other) { - fgt_multioutput_node_with_body<N>( CODEPTR(), FLOW_MULTIFUNCTION_NODE, - &this->my_graph, static_cast<receiver<input_type> *>(this), - this->output_ports(), this->my_body ); - } - - // all the guts are in multifunction_input... -protected: - void reset_node(reset_flags f) override { input_impl_type::reset(f); } -}; // multifunction_node - -//! split_node: accepts a tuple as input, forwards each element of the tuple to its -// successors. The node has unlimited concurrency, so it does not reject inputs. -template<typename TupleType> -class split_node : public graph_node, public receiver<TupleType> { - static const int N = std::tuple_size<TupleType>::value; - typedef receiver<TupleType> base_type; -public: - typedef TupleType input_type; - typedef typename wrap_tuple_elements< - N, // #elements in tuple - multifunction_output, // wrap this around each element - TupleType // the tuple providing the types - >::type output_ports_type; - - __TBB_NOINLINE_SYM explicit split_node(graph &g) - : graph_node(g), - my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)) - { - fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), this->output_ports()); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - __TBB_NOINLINE_SYM split_node(const node_set<Args...>& nodes) : split_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - __TBB_NOINLINE_SYM split_node(const split_node& other) - : graph_node(other.my_graph), base_type(other), - my_output_ports(init_output_ports<output_ports_type>::call(other.my_graph, my_output_ports)) - { - fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), this->output_ports()); - } - - output_ports_type &output_ports() { return my_output_ports; } - -protected: - graph_task *try_put_task(const TupleType& t) override { - // Sending split messages in parallel is not justified, as overheads would prevail. - // Also, we do not have successors here. So we just tell the task returned here is successful. - return emit_element<N>::emit_this(this->my_graph, t, output_ports()); - } - void reset_node(reset_flags f) override { - if (f & rf_clear_edges) - clear_element<N>::clear_this(my_output_ports); - - __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "split_node reset failed"); - } - graph& graph_reference() const override { - return my_graph; - } - -private: - output_ports_type my_output_ports; -}; - -//! Implements an executable node that supports continue_msg -> Output -template <typename Output, typename Policy = Policy<void> > -class continue_node : public graph_node, public continue_input<Output, Policy>, - public function_output<Output> { -public: - typedef continue_msg input_type; - typedef Output output_type; - typedef continue_input<Output, Policy> input_impl_type; - typedef function_output<output_type> fOutput_type; - typedef typename input_impl_type::predecessor_type predecessor_type; - typedef typename fOutput_type::successor_type successor_type; - - //! Constructor for executable node with continue_msg -> Output - template <typename Body > - __TBB_NOINLINE_SYM continue_node( - graph &g, - Body body, Policy = Policy(), node_priority_t a_priority = no_priority - ) : graph_node(g), input_impl_type( g, body, a_priority ), - fOutput_type(g) { - fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, - - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this), this->my_body ); - } - - template <typename Body> - continue_node( graph& g, Body body, node_priority_t a_priority ) - : continue_node(g, body, Policy(), a_priority) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Args> - continue_node( const node_set<Args...>& nodes, Body body, - Policy p = Policy(), node_priority_t a_priority = no_priority ) - : continue_node(nodes.graph_reference(), body, p, a_priority ) { - make_edges_in_order(nodes, *this); - } - template <typename Body, typename... Args> - continue_node( const node_set<Args...>& nodes, Body body, node_priority_t a_priority) - : continue_node(nodes, body, Policy(), a_priority) {} -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - - //! Constructor for executable node with continue_msg -> Output - template <typename Body > - __TBB_NOINLINE_SYM continue_node( - graph &g, int number_of_predecessors, - Body body, Policy = Policy(), node_priority_t a_priority = no_priority - ) : graph_node(g) - , input_impl_type(g, number_of_predecessors, body, a_priority), - fOutput_type(g) { - fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this), this->my_body ); - } - - template <typename Body> - continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority) - : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Args> - continue_node( const node_set<Args...>& nodes, int number_of_predecessors, - Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) - : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) { - make_edges_in_order(nodes, *this); - } - - template <typename Body, typename... Args> - continue_node( const node_set<Args...>& nodes, int number_of_predecessors, - Body body, node_priority_t a_priority ) - : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {} -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM continue_node( const continue_node& src ) : - graph_node(src.my_graph), input_impl_type(src), - function_output<Output>(src.my_graph) { - fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this), this->my_body ); - } - -protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - using input_impl_type::try_put_task; - broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } - - void reset_node(reset_flags f) override { - input_impl_type::reset_receiver(f); - if(f & rf_clear_edges)successors().clear(); - __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset"); - } -}; // continue_node - -//! Forwards messages of type T to all successors -template <typename T> -class broadcast_node : public graph_node, public receiver<T>, public sender<T> { -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; -private: - broadcast_cache<input_type> my_successors; -public: - - __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { - fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - broadcast_node(const node_set<Args...>& nodes) : broadcast_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} - - //! Adds a successor - bool register_successor( successor_type &r ) override { - my_successors.register_successor( r ); - return true; - } - - //! Removes s as a successor - bool remove_successor( successor_type &r ) override { - my_successors.remove_successor( r ); - return true; - } - -protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - //! build a task to run the successor if possible. Default is old behavior. - graph_task *try_put_task(const T& t) override { - graph_task *new_task = my_successors.try_put_task(t); - if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; - return new_task; - } - - graph& graph_reference() const override { - return my_graph; - } - - void reset_node(reset_flags f) override { - if (f&rf_clear_edges) { - my_successors.clear(); - } - __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node"); - } -}; // broadcast_node - -//! Forwards messages in arbitrary order -template <typename T> -class buffer_node - : public graph_node - , public reservable_item_buffer< T, cache_aligned_allocator<T> > - , public receiver<T>, public sender<T> -{ - typedef cache_aligned_allocator<T> internals_allocator; - -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - typedef buffer_node<T> class_type; - -protected: - typedef size_t size_type; - round_robin_cache< T, null_rw_mutex > my_successors; - - friend class forward_task_bypass< class_type >; - - enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task - }; - - // implements the aggregator_operation concept - class buffer_operation : public aggregated_operation< buffer_operation > { - public: - char type; - T* elem; - graph_task* ltask; - successor_type *r; - - buffer_operation(const T& e, op_type t) : type(char(t)) - , elem(const_cast<T*>(&e)) , ltask(NULL) - {} - buffer_operation(op_type t) : type(char(t)), ltask(NULL) {} - }; - - bool forwarder_busy; - typedef aggregating_functor<class_type, buffer_operation> handler_type; - friend class aggregating_functor<class_type, buffer_operation>; - aggregator< handler_type, buffer_operation> my_aggregator; - - virtual void handle_operations(buffer_operation *op_list) { - handle_operations_impl(op_list, this); - } - - template<typename derived_type> - void handle_operations_impl(buffer_operation *op_list, derived_type* derived) { - __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); - - buffer_operation *tmp = NULL; - bool try_forwarding = false; - while (op_list) { - tmp = op_list; - op_list = op_list->next; - switch (tmp->type) { - case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break; - case rem_succ: internal_rem_succ(tmp); break; - case req_item: internal_pop(tmp); break; - case res_item: internal_reserve(tmp); break; - case rel_res: internal_release(tmp); try_forwarding = true; break; - case con_res: internal_consume(tmp); try_forwarding = true; break; - case put_item: try_forwarding = internal_push(tmp); break; - case try_fwd_task: internal_forward_task(tmp); break; - } - } - - derived->order(); - - if (try_forwarding && !forwarder_busy) { - if(is_graph_active(this->my_graph)) { - forwarder_busy = true; - typedef forward_task_bypass<class_type> task_type; - small_object_allocator allocator{}; - graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this); - my_graph.reserve_wait(); - // tmp should point to the last item handled by the aggregator. This is the operation - // the handling thread enqueued. So modifying that record will be okay. - // TODO revamp: check that the issue is still present - // workaround for icc bug (at least 12.0 and 13.0) - // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list - // argument types are: (graph, graph_task *, graph_task *) - graph_task *z = tmp->ltask; - graph &g = this->my_graph; - tmp->ltask = combine_tasks(g, z, new_task); // in case the op generated a task - } - } - } // handle_operations - - inline graph_task *grab_forwarding_task( buffer_operation &op_data) { - return op_data.ltask; - } - - inline bool enqueue_forwarding_task(buffer_operation &op_data) { - graph_task *ft = grab_forwarding_task(op_data); - if(ft) { - spawn_in_graph_arena(graph_reference(), *ft); - return true; - } - return false; - } - - //! This is executed by an enqueued task, the "forwarder" - virtual graph_task *forward_task() { - buffer_operation op_data(try_fwd_task); - graph_task *last_task = NULL; - do { - op_data.status = WAIT; - op_data.ltask = NULL; - my_aggregator.execute(&op_data); - - // workaround for icc bug - graph_task *xtask = op_data.ltask; - graph& g = this->my_graph; - last_task = combine_tasks(g, last_task, xtask); - } while (op_data.status ==SUCCEEDED); - return last_task; - } - - //! Register successor - virtual void internal_reg_succ(buffer_operation *op) { - my_successors.register_successor(*(op->r)); - op->status.store(SUCCEEDED, std::memory_order_release); - } - - //! Remove successor - virtual void internal_rem_succ(buffer_operation *op) { - my_successors.remove_successor(*(op->r)); - op->status.store(SUCCEEDED, std::memory_order_release); - } - -private: - void order() {} - - bool is_item_valid() { - return this->my_item_valid(this->my_tail - 1); - } - - void try_put_and_add_task(graph_task*& last_task) { - graph_task *new_task = my_successors.try_put_task(this->back()); - if (new_task) { - // workaround for icc bug - graph& g = this->my_graph; - last_task = combine_tasks(g, last_task, new_task); - this->destroy_back(); - } - } - -protected: - //! Tries to forward valid items to successors - virtual void internal_forward_task(buffer_operation *op) { - internal_forward_task_impl(op, this); - } - - template<typename derived_type> - void internal_forward_task_impl(buffer_operation *op, derived_type* derived) { - __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); - - if (this->my_reserved || !derived->is_item_valid()) { - op->status.store(FAILED, std::memory_order_release); - this->forwarder_busy = false; - return; - } - // Try forwarding, giving each successor a chance - graph_task* last_task = NULL; - size_type counter = my_successors.size(); - for (; counter > 0 && derived->is_item_valid(); --counter) - derived->try_put_and_add_task(last_task); - - op->ltask = last_task; // return task - if (last_task && !counter) { - op->status.store(SUCCEEDED, std::memory_order_release); - } - else { - op->status.store(FAILED, std::memory_order_release); - forwarder_busy = false; - } - } - - virtual bool internal_push(buffer_operation *op) { - this->push_back(*(op->elem)); - op->status.store(SUCCEEDED, std::memory_order_release); - return true; - } - - virtual void internal_pop(buffer_operation *op) { - if(this->pop_back(*(op->elem))) { - op->status.store(SUCCEEDED, std::memory_order_release); - } - else { - op->status.store(FAILED, std::memory_order_release); - } - } - - virtual void internal_reserve(buffer_operation *op) { - if(this->reserve_front(*(op->elem))) { - op->status.store(SUCCEEDED, std::memory_order_release); - } - else { - op->status.store(FAILED, std::memory_order_release); - } - } - - virtual void internal_consume(buffer_operation *op) { - this->consume_front(); - op->status.store(SUCCEEDED, std::memory_order_release); - } - - virtual void internal_release(buffer_operation *op) { - this->release_front(); - op->status.store(SUCCEEDED, std::memory_order_release); - } - -public: - //! Constructor - __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) - : graph_node(g), reservable_item_buffer<T, internals_allocator>(), receiver<T>(), - sender<T>(), my_successors(this), forwarder_busy(false) - { - my_aggregator.initialize_handler(handler_type(this)); - fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - buffer_node(const node_set<Args...>& nodes) : buffer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} - - // - // message sender implementation - // - - //! Adds a new successor. - /** Adds successor r to the list of successors; may forward tasks. */ - bool register_successor( successor_type &r ) override { - buffer_operation op_data(reg_succ); - op_data.r = &r; - my_aggregator.execute(&op_data); - (void)enqueue_forwarding_task(op_data); - return true; - } - - //! Removes a successor. - /** Removes successor r from the list of successors. - It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ - bool remove_successor( successor_type &r ) override { - // TODO revamp: investigate why full qualification is necessary here - tbb::detail::d1::remove_predecessor(r, *this); - buffer_operation op_data(rem_succ); - op_data.r = &r; - my_aggregator.execute(&op_data); - // even though this operation does not cause a forward, if we are the handler, and - // a forward is scheduled, we may be the first to reach this point after the aggregator, - // and so should check for the task. - (void)enqueue_forwarding_task(op_data); - return true; - } - - //! Request an item from the buffer_node - /** true = v contains the returned item<BR> - false = no item has been returned */ - bool try_get( T &v ) override { - buffer_operation op_data(req_item); - op_data.elem = &v; - my_aggregator.execute(&op_data); - (void)enqueue_forwarding_task(op_data); - return (op_data.status==SUCCEEDED); - } - - //! Reserves an item. - /** false = no item can be reserved<BR> - true = an item is reserved */ - bool try_reserve( T &v ) override { - buffer_operation op_data(res_item); - op_data.elem = &v; - my_aggregator.execute(&op_data); - (void)enqueue_forwarding_task(op_data); - return (op_data.status==SUCCEEDED); - } - - //! Release a reserved item. - /** true = item has been released and so remains in sender */ - bool try_release() override { - buffer_operation op_data(rel_res); - my_aggregator.execute(&op_data); - (void)enqueue_forwarding_task(op_data); - return true; - } - - //! Consumes a reserved item. - /** true = item is removed from sender and reservation removed */ - bool try_consume() override { - buffer_operation op_data(con_res); - my_aggregator.execute(&op_data); - (void)enqueue_forwarding_task(op_data); - return true; - } - -protected: - - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - //! receive an item, return a task *if possible - graph_task *try_put_task(const T &t) override { - buffer_operation op_data(t, put_item); - my_aggregator.execute(&op_data); - graph_task *ft = grab_forwarding_task(op_data); - // sequencer_nodes can return failure (if an item has been previously inserted) - // We have to spawn the returned task if our own operation fails. - - if(ft && op_data.status ==FAILED) { - // we haven't succeeded queueing the item, but for some reason the - // call returned a task (if another request resulted in a successful - // forward this could happen.) Queue the task and reset the pointer. - spawn_in_graph_arena(graph_reference(), *ft); ft = NULL; - } - else if(!ft && op_data.status ==SUCCEEDED) { - ft = SUCCESSFULLY_ENQUEUED; - } - return ft; - } - - graph& graph_reference() const override { - return my_graph; - } - -protected: - void reset_node( reset_flags f) override { - reservable_item_buffer<T, internals_allocator>::reset(); - // TODO: just clear structures - if (f&rf_clear_edges) { - my_successors.clear(); - } - forwarder_busy = false; - } -}; // buffer_node - -//! Forwards messages in FIFO order -template <typename T> -class queue_node : public buffer_node<T> { -protected: - typedef buffer_node<T> base_type; - typedef typename base_type::size_type size_type; - typedef typename base_type::buffer_operation queue_operation; - typedef queue_node class_type; - -private: - template<typename> friend class buffer_node; - - bool is_item_valid() { - return this->my_item_valid(this->my_head); - } - - void try_put_and_add_task(graph_task*& last_task) { - graph_task *new_task = this->my_successors.try_put_task(this->front()); - if (new_task) { - // workaround for icc bug - graph& graph_ref = this->graph_reference(); - last_task = combine_tasks(graph_ref, last_task, new_task); - this->destroy_front(); - } - } - -protected: - void internal_forward_task(queue_operation *op) override { - this->internal_forward_task_impl(op, this); - } - - void internal_pop(queue_operation *op) override { - if ( this->my_reserved || !this->my_item_valid(this->my_head)){ - op->status.store(FAILED, std::memory_order_release); - } - else { - this->pop_front(*(op->elem)); - op->status.store(SUCCEEDED, std::memory_order_release); - } - } - void internal_reserve(queue_operation *op) override { - if (this->my_reserved || !this->my_item_valid(this->my_head)) { - op->status.store(FAILED, std::memory_order_release); - } - else { - this->reserve_front(*(op->elem)); - op->status.store(SUCCEEDED, std::memory_order_release); - } - } - void internal_consume(queue_operation *op) override { - this->consume_front(); - op->status.store(SUCCEEDED, std::memory_order_release); - } - -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - - //! Constructor - __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) { - fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - queue_node( const node_set<Args...>& nodes) : queue_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) { - fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - - -protected: - void reset_node( reset_flags f) override { - base_type::reset_node(f); - } -}; // queue_node - -//! Forwards messages in sequence order -template <typename T> -class sequencer_node : public queue_node<T> { - function_body< T, size_t > *my_sequencer; - // my_sequencer should be a benign function and must be callable - // from a parallel context. Does this mean it needn't be reset? -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - - //! Constructor - template< typename Sequencer > - __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node<T>(g), - my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) { - fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Sequencer, typename... Args> - sequencer_node( const node_set<Args...>& nodes, const Sequencer& s) - : sequencer_node(nodes.graph_reference(), s) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node<T>(src), - my_sequencer( src.my_sequencer->clone() ) { - fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - - //! Destructor - ~sequencer_node() { delete my_sequencer; } - -protected: - typedef typename buffer_node<T>::size_type size_type; - typedef typename buffer_node<T>::buffer_operation sequencer_operation; - -private: - bool internal_push(sequencer_operation *op) override { - size_type tag = (*my_sequencer)(*(op->elem)); -#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES - if (tag < this->my_head) { - // have already emitted a message with this tag - op->status.store(FAILED, std::memory_order_release); - return false; - } -#endif - // cannot modify this->my_tail now; the buffer would be inconsistent. - size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail; - - if (this->size(new_tail) > this->capacity()) { - this->grow_my_array(this->size(new_tail)); - } - this->my_tail = new_tail; - - const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; - op->status.store(res, std::memory_order_release); - return res ==SUCCEEDED; - } -}; // sequencer_node - -//! Forwards messages in priority order -template<typename T, typename Compare = std::less<T>> -class priority_queue_node : public buffer_node<T> { -public: - typedef T input_type; - typedef T output_type; - typedef buffer_node<T> base_type; - typedef priority_queue_node class_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - - //! Constructor - __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() ) - : buffer_node<T>(g), compare(comp), mark(0) { - fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - priority_queue_node(const node_set<Args...>& nodes, const Compare& comp = Compare()) - : priority_queue_node(nodes.graph_reference(), comp) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor - __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src ) - : buffer_node<T>(src), mark(0) - { - fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -protected: - - void reset_node( reset_flags f) override { - mark = 0; - base_type::reset_node(f); - } - - typedef typename buffer_node<T>::size_type size_type; - typedef typename buffer_node<T>::item_type item_type; - typedef typename buffer_node<T>::buffer_operation prio_operation; - - //! Tries to forward valid items to successors - void internal_forward_task(prio_operation *op) override { - this->internal_forward_task_impl(op, this); - } - - void handle_operations(prio_operation *op_list) override { - this->handle_operations_impl(op_list, this); - } - - bool internal_push(prio_operation *op) override { - prio_push(*(op->elem)); - op->status.store(SUCCEEDED, std::memory_order_release); - return true; - } - - void internal_pop(prio_operation *op) override { - // if empty or already reserved, don't pop - if ( this->my_reserved == true || this->my_tail == 0 ) { - op->status.store(FAILED, std::memory_order_release); - return; - } - - *(op->elem) = prio(); - op->status.store(SUCCEEDED, std::memory_order_release); - prio_pop(); - - } - - // pops the highest-priority item, saves copy - void internal_reserve(prio_operation *op) override { - if (this->my_reserved == true || this->my_tail == 0) { - op->status.store(FAILED, std::memory_order_release); - return; - } - this->my_reserved = true; - *(op->elem) = prio(); - reserved_item = *(op->elem); - op->status.store(SUCCEEDED, std::memory_order_release); - prio_pop(); - } - - void internal_consume(prio_operation *op) override { - op->status.store(SUCCEEDED, std::memory_order_release); - this->my_reserved = false; - reserved_item = input_type(); - } - - void internal_release(prio_operation *op) override { - op->status.store(SUCCEEDED, std::memory_order_release); - prio_push(reserved_item); - this->my_reserved = false; - reserved_item = input_type(); - } - -private: - template<typename> friend class buffer_node; - - void order() { - if (mark < this->my_tail) heapify(); - __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify"); - } - - bool is_item_valid() { - return this->my_tail > 0; - } - - void try_put_and_add_task(graph_task*& last_task) { - graph_task * new_task = this->my_successors.try_put_task(this->prio()); - if (new_task) { - // workaround for icc bug - graph& graph_ref = this->graph_reference(); - last_task = combine_tasks(graph_ref, last_task, new_task); - prio_pop(); - } - } - -private: - Compare compare; - size_type mark; - - input_type reserved_item; - - // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item - bool prio_use_tail() { - __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test"); - return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1)); - } - - // prio_push: checks that the item will fit, expand array if necessary, put at end - void prio_push(const T &src) { - if ( this->my_tail >= this->my_array_size ) - this->grow_my_array( this->my_tail + 1 ); - (void) this->place_item(this->my_tail, src); - ++(this->my_tail); - __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); - } - - // prio_pop: deletes highest priority item from the array, and if it is item - // 0, move last item to 0 and reheap. If end of array, just destroy and decrement tail - // and mark. Assumes the array has already been tested for emptiness; no failure. - void prio_pop() { - if (prio_use_tail()) { - // there are newly pushed elements; last one higher than top - // copy the data - this->destroy_item(this->my_tail-1); - --(this->my_tail); - __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); - return; - } - this->destroy_item(0); - if(this->my_tail > 1) { - // push the last element down heap - __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL); - this->move_item(0,this->my_tail - 1); - } - --(this->my_tail); - if(mark > this->my_tail) --mark; - if (this->my_tail > 1) // don't reheap for heap of size 1 - reheap(); - __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); - } - - const T& prio() { - return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); - } - - // turn array into heap - void heapify() { - if(this->my_tail == 0) { - mark = 0; - return; - } - if (!mark) mark = 1; - for (; mark<this->my_tail; ++mark) { // for each unheaped element - size_type cur_pos = mark; - input_type to_place; - this->fetch_item(mark,to_place); - do { // push to_place up the heap - size_type parent = (cur_pos-1)>>1; - if (!compare(this->get_my_item(parent), to_place)) - break; - this->move_item(cur_pos, parent); - cur_pos = parent; - } while( cur_pos ); - (void) this->place_item(cur_pos, to_place); - } - } - - // otherwise heapified array with new root element; rearrange to heap - void reheap() { - size_type cur_pos=0, child=1; - while (child < mark) { - size_type target = child; - if (child+1<mark && - compare(this->get_my_item(child), - this->get_my_item(child+1))) - ++target; - // target now has the higher priority child - if (compare(this->get_my_item(target), - this->get_my_item(cur_pos))) - break; - // swap - this->swap_items(cur_pos, target); - cur_pos = target; - child = (cur_pos<<1)+1; - } - } -}; // priority_queue_node - -//! Forwards messages only if the threshold has not been reached -/** This node forwards items until its threshold is reached. - It contains no buffering. If the downstream node rejects, the - message is dropped. */ -template< typename T, typename DecrementType=continue_msg > -class limiter_node : public graph_node, public receiver< T >, public sender< T > { -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later. - -private: - size_t my_threshold; - size_t my_count; // number of successful puts - size_t my_tries; // number of active put attempts - reservable_predecessor_cache< T, spin_mutex > my_predecessors; - spin_mutex my_mutex; - broadcast_cache< T > my_successors; - - //! The internal receiver< DecrementType > that adjusts the count - threshold_regulator< limiter_node<T, DecrementType>, DecrementType > decrement; - - graph_task* decrement_counter( long long delta ) { - { - spin_mutex::scoped_lock lock(my_mutex); - if( delta > 0 && size_t(delta) > my_count ) - my_count = 0; - else if( delta < 0 && size_t(delta) > my_threshold - my_count ) - my_count = my_threshold; - else - my_count -= size_t(delta); // absolute value of delta is sufficiently small - } - return forward_task(); - } - - // Let threshold_regulator call decrement_counter() - friend class threshold_regulator< limiter_node<T, DecrementType>, DecrementType >; - - friend class forward_task_bypass< limiter_node<T,DecrementType> >; - - bool check_conditions() { // always called under lock - return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() ); - } - - // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED - graph_task* forward_task() { - input_type v; - graph_task* rval = NULL; - bool reserved = false; - { - spin_mutex::scoped_lock lock(my_mutex); - if ( check_conditions() ) - ++my_tries; - else - return NULL; - } - - //SUCCESS - // if we can reserve and can put, we consume the reservation - // we increment the count and decrement the tries - if ( (my_predecessors.try_reserve(v)) == true ){ - reserved=true; - if ( (rval = my_successors.try_put_task(v)) != NULL ){ - { - spin_mutex::scoped_lock lock(my_mutex); - ++my_count; - --my_tries; - my_predecessors.try_consume(); - if ( check_conditions() ) { - if ( is_graph_active(this->my_graph) ) { - typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - small_object_allocator allocator{}; - graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this ); - my_graph.reserve_wait(); - spawn_in_graph_arena(graph_reference(), *rtask); - } - } - } - return rval; - } - } - //FAILURE - //if we can't reserve, we decrement the tries - //if we can reserve but can't put, we decrement the tries and release the reservation - { - spin_mutex::scoped_lock lock(my_mutex); - --my_tries; - if (reserved) my_predecessors.try_release(); - if ( check_conditions() ) { - if ( is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; - typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - __TBB_ASSERT(!rval, "Have two tasks to handle"); - return t; - } - } - return rval; - } - } - - void initialize() { - fgt_node( - CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<receiver<DecrementType> *>(&decrement), - static_cast<sender<output_type> *>(this) - ); - } - -public: - //! Constructor - limiter_node(graph &g, size_t threshold) - : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this) - , my_successors(this), decrement(this) - { - initialize(); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - limiter_node(const node_set<Args...>& nodes, size_t threshold) - : limiter_node(nodes.graph_reference(), threshold) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor - limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} - - //! The interface for accessing internal receiver< DecrementType > that adjusts the count - receiver<DecrementType>& decrementer() { return decrement; } - - //! Replace the current successor with this new successor - bool register_successor( successor_type &r ) override { - spin_mutex::scoped_lock lock(my_mutex); - bool was_empty = my_successors.empty(); - my_successors.register_successor(r); - //spawn a forward task if this is the only successor - if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { - if ( is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; - typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - spawn_in_graph_arena(graph_reference(), *t); - } - } - return true; - } - - //! Removes a successor from this node - /** r.remove_predecessor(*this) is also called. */ - bool remove_successor( successor_type &r ) override { - // TODO revamp: investigate why qualification is needed for remove_predecessor() call - tbb::detail::d1::remove_predecessor(r, *this); - my_successors.remove_successor(r); - return true; - } - - //! Adds src to the list of cached predecessors. - bool register_predecessor( predecessor_type &src ) override { - spin_mutex::scoped_lock lock(my_mutex); - my_predecessors.add( src ); - if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; - typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - spawn_in_graph_arena(graph_reference(), *t); - } - return true; - } - - //! Removes src from the list of cached predecessors. - bool remove_predecessor( predecessor_type &src ) override { - my_predecessors.remove( src ); - return true; - } - -protected: - - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - //! Puts an item to this receiver - graph_task* try_put_task( const T &t ) override { - { - spin_mutex::scoped_lock lock(my_mutex); - if ( my_count + my_tries >= my_threshold ) - return NULL; - else - ++my_tries; - } - - graph_task* rtask = my_successors.try_put_task(t); - - if ( !rtask ) { // try_put_task failed. - spin_mutex::scoped_lock lock(my_mutex); - --my_tries; - if (check_conditions() && is_graph_active(this->my_graph)) { - small_object_allocator allocator{}; - typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - rtask = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); - } - } - else { - spin_mutex::scoped_lock lock(my_mutex); - ++my_count; - --my_tries; - } - return rtask; - } - - graph& graph_reference() const override { return my_graph; } - - void reset_node( reset_flags f) override { - my_count = 0; - if(f & rf_clear_edges) { - my_predecessors.clear(); - my_successors.clear(); - } - else - { - my_predecessors.reset( ); - } - decrement.reset_receiver(f); - } -}; // limiter_node - -#include "detail/_flow_graph_join_impl.h" - -template<typename OutputTuple, typename JP=queueing> class join_node; - -template<typename OutputTuple> -class join_node<OutputTuple,reserving>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> { -private: - static const int N = std::tuple_size<OutputTuple>::value; - typedef unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type; -public: - typedef OutputTuple output_type; - typedef typename unfolded_type::input_ports_type input_ports_type; - __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -template<typename OutputTuple> -class join_node<OutputTuple,queueing>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> { -private: - static const int N = std::tuple_size<OutputTuple>::value; - typedef unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type; -public: - typedef OutputTuple output_type; - typedef typename unfolded_type::input_ports_type input_ports_type; - __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -// template for key_matching join_node -// tag_matching join_node is a specialization of key_matching, and is source-compatible. -template<typename OutputTuple, typename K, typename KHash> -class join_node<OutputTuple, key_matching<K, KHash> > : public unfolded_join_node<std::tuple_size<OutputTuple>::value, - key_matching_port, OutputTuple, key_matching<K,KHash> > { -private: - static const int N = std::tuple_size<OutputTuple>::value; - typedef unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type; -public: - typedef OutputTuple output_type; - typedef typename unfolded_type::input_ports_type input_ports_type; - -#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING - join_node(graph &g) : unfolded_type(g) {} -#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ - - template<typename __TBB_B0, typename __TBB_B1> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) : - unfolded_type(g, b0, b1, b2, b3, b4) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#if __TBB_VARIADIC_MAX >= 6 - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, - typename __TBB_B5> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) : - unfolded_type(g, b0, b1, b2, b3, b4, b5) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#endif -#if __TBB_VARIADIC_MAX >= 7 - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, - typename __TBB_B5, typename __TBB_B6> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) : - unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#endif -#if __TBB_VARIADIC_MAX >= 8 - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, - typename __TBB_B5, typename __TBB_B6, typename __TBB_B7> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, - __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#endif -#if __TBB_VARIADIC_MAX >= 9 - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, - typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, - __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#endif -#if __TBB_VARIADIC_MAX >= 10 - template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, - typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9> - __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, - __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -#endif - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template < -#if (__clang_major__ == 3 && __clang_minor__ == 4) - // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter. - template<typename...> class node_set, -#endif - typename... Args, typename... Bodies - > - __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, Bodies... bodies) - : join_node(nodes.graph_reference(), bodies...) { - make_edges_in_order(nodes, *this); - } -#endif - - __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -// indexer node -#include "detail/_flow_graph_indexer_impl.h" - -// TODO: Implement interface with variadic template or tuple -template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type, - typename T4=null_type, typename T5=null_type, typename T6=null_type, - typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node; - -//indexer node specializations -template<typename T0> -class indexer_node<T0> : public unfolded_indexer_node<std::tuple<T0> > { -private: - static const int N = 1; -public: - typedef std::tuple<T0> InputTuple; - typedef tagged_msg<size_t, T0> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } -}; - -template<typename T0, typename T1> -class indexer_node<T0, T1> : public unfolded_indexer_node<std::tuple<T0, T1> > { -private: - static const int N = 2; -public: - typedef std::tuple<T0, T1> InputTuple; - typedef tagged_msg<size_t, T0, T1> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -template<typename T0, typename T1, typename T2> -class indexer_node<T0, T1, T2> : public unfolded_indexer_node<std::tuple<T0, T1, T2> > { -private: - static const int N = 3; -public: - typedef std::tuple<T0, T1, T2> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -template<typename T0, typename T1, typename T2, typename T3> -class indexer_node<T0, T1, T2, T3> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3> > { -private: - static const int N = 4; -public: - typedef std::tuple<T0, T1, T2, T3> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -template<typename T0, typename T1, typename T2, typename T3, typename T4> -class indexer_node<T0, T1, T2, T3, T4> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4> > { -private: - static const int N = 5; -public: - typedef std::tuple<T0, T1, T2, T3, T4> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; - -#if __TBB_VARIADIC_MAX >= 6 -template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> -class indexer_node<T0, T1, T2, T3, T4, T5> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5> > { -private: - static const int N = 6; -public: - typedef std::tuple<T0, T1, T2, T3, T4, T5> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; -#endif //variadic max 6 - -#if __TBB_VARIADIC_MAX >= 7 -template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, - typename T6> -class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6> > { -private: - static const int N = 7; -public: - typedef std::tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; -#endif //variadic max 7 - -#if __TBB_VARIADIC_MAX >= 8 -template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, - typename T6, typename T7> -class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> > { -private: - static const int N = 8; -public: - typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; -#endif //variadic max 8 - -#if __TBB_VARIADIC_MAX >= 9 -template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8> -class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > { -private: - static const int N = 9; -public: - typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; -#endif //variadic max 9 - -#if __TBB_VARIADIC_MAX >= 10 -template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, typename T9> -class indexer_node/*default*/ : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > { -private: - static const int N = 10; -public: - typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple; - typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type; - typedef unfolded_indexer_node<InputTuple> unfolded_type; - __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - // Copy constructor - __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { - fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, - this->input_ports(), static_cast< sender< output_type > *>(this) ); - } - -}; -#endif //variadic max 10 - -template< typename T > -inline void internal_make_edge( sender<T> &p, receiver<T> &s ) { - register_successor(p, s); - fgt_make_edge( &p, &s ); -} - -//! Makes an edge between a single predecessor and a single successor -template< typename T > -inline void make_edge( sender<T> &p, receiver<T> &s ) { - internal_make_edge( p, s ); -} - -//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor. -template< typename T, typename V, - typename = typename T::output_ports_type, typename = typename V::input_ports_type > -inline void make_edge( T& output, V& input) { - make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); -} - -//Makes an edge from port 0 of a multi-output predecessor to a receiver. -template< typename T, typename R, - typename = typename T::output_ports_type > -inline void make_edge( T& output, receiver<R>& input) { - make_edge(std::get<0>(output.output_ports()), input); -} - -//Makes an edge from a sender to port 0 of a multi-input successor. -template< typename S, typename V, - typename = typename V::input_ports_type > -inline void make_edge( sender<S>& output, V& input) { - make_edge(output, std::get<0>(input.input_ports())); -} - -template< typename T > -inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) { - remove_successor( p, s ); - fgt_remove_edge( &p, &s ); -} - -//! Removes an edge between a single predecessor and a single successor -template< typename T > -inline void remove_edge( sender<T> &p, receiver<T> &s ) { - internal_remove_edge( p, s ); -} - -//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor. -template< typename T, typename V, - typename = typename T::output_ports_type, typename = typename V::input_ports_type > -inline void remove_edge( T& output, V& input) { - remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); -} - -//Removes an edge between port 0 of a multi-output predecessor and a receiver. -template< typename T, typename R, - typename = typename T::output_ports_type > -inline void remove_edge( T& output, receiver<R>& input) { - remove_edge(std::get<0>(output.output_ports()), input); -} -//Removes an edge between a sender and port 0 of a multi-input successor. -template< typename S, typename V, - typename = typename V::input_ports_type > -inline void remove_edge( sender<S>& output, V& input) { - remove_edge(output, std::get<0>(input.input_ports())); -} - -//! Returns a copy of the body from a function or continue node -template< typename Body, typename Node > -Body copy_body( Node &n ) { - return n.template copy_function_object<Body>(); -} - -//composite_node -template< typename InputTuple, typename OutputTuple > class composite_node; - -template< typename... InputTypes, typename... OutputTypes> -class composite_node <std::tuple<InputTypes...>, std::tuple<OutputTypes...> > : public graph_node { - -public: - typedef std::tuple< receiver<InputTypes>&... > input_ports_type; - typedef std::tuple< sender<OutputTypes>&... > output_ports_type; - -private: - std::unique_ptr<input_ports_type> my_input_ports; - std::unique_ptr<output_ports_type> my_output_ports; - - static const size_t NUM_INPUTS = sizeof...(InputTypes); - static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); - -protected: - void reset_node(reset_flags) override {} - -public: - composite_node( graph &g ) : graph_node(g) { - fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph ); - } - - template<typename T1, typename T2> - void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) { - static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); - static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); - - fgt_internal_input_alias_helper<T1, NUM_INPUTS>::alias_port( this, input_ports_tuple); - fgt_internal_output_alias_helper<T2, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); - - my_input_ports.reset( new input_ports_type(std::forward<T1>(input_ports_tuple)) ); - my_output_ports.reset( new output_ports_type(std::forward<T2>(output_ports_tuple)) ); - } - - template< typename... NodeTypes > - void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } - - template< typename... NodeTypes > - void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } - - - input_ports_type& input_ports() { - __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); - return *my_input_ports; - } - - output_ports_type& output_ports() { - __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); - return *my_output_ports; - } -}; // class composite_node - -//composite_node with only input ports -template< typename... InputTypes> -class composite_node <std::tuple<InputTypes...>, std::tuple<> > : public graph_node { -public: - typedef std::tuple< receiver<InputTypes>&... > input_ports_type; - -private: - std::unique_ptr<input_ports_type> my_input_ports; - static const size_t NUM_INPUTS = sizeof...(InputTypes); - -protected: - void reset_node(reset_flags) override {} - -public: - composite_node( graph &g ) : graph_node(g) { - fgt_composite( CODEPTR(), this, &g ); - } - - template<typename T> - void set_external_ports(T&& input_ports_tuple) { - static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); - - fgt_internal_input_alias_helper<T, NUM_INPUTS>::alias_port( this, input_ports_tuple); - - my_input_ports.reset( new input_ports_type(std::forward<T>(input_ports_tuple)) ); - } - - template< typename... NodeTypes > - void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } - - template< typename... NodeTypes > - void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); } - - - input_ports_type& input_ports() { - __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); - return *my_input_ports; - } - -}; // class composite_node - -//composite_nodes with only output_ports -template<typename... OutputTypes> -class composite_node <std::tuple<>, std::tuple<OutputTypes...> > : public graph_node { -public: - typedef std::tuple< sender<OutputTypes>&... > output_ports_type; - -private: - std::unique_ptr<output_ports_type> my_output_ports; - static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); - -protected: - void reset_node(reset_flags) override {} - -public: - __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) { - fgt_composite( CODEPTR(), this, &g ); - } - - template<typename T> - void set_external_ports(T&& output_ports_tuple) { - static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); - - fgt_internal_output_alias_helper<T, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); - - my_output_ports.reset( new output_ports_type(std::forward<T>(output_ports_tuple)) ); - } - - template<typename... NodeTypes > - void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } - - template<typename... NodeTypes > - void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } - - - output_ports_type& output_ports() { - __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); - return *my_output_ports; - } - -}; // class composite_node - -template<typename Gateway> -class async_body_base: no_assign { -public: - typedef Gateway gateway_type; - - async_body_base(gateway_type *gateway): my_gateway(gateway) { } - void set_gateway(gateway_type *gateway) { - my_gateway = gateway; - } - -protected: - gateway_type *my_gateway; -}; - -template<typename Input, typename Ports, typename Gateway, typename Body> -class async_body: public async_body_base<Gateway> { -public: - typedef async_body_base<Gateway> base_type; - typedef Gateway gateway_type; - - async_body(const Body &body, gateway_type *gateway) - : base_type(gateway), my_body(body) { } - - void operator()( const Input &v, Ports & ) { - my_body(v, *this->my_gateway); - } - - Body get_body() { return my_body; } - -private: - Body my_body; -}; - -//! Implements async node -template < typename Input, typename Output, - typename Policy = queueing_lightweight > -class async_node - : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output > -{ - typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type; - typedef multifunction_input< - Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator<Input>> mfn_input_type; - -public: - typedef Input input_type; - typedef Output output_type; - typedef receiver<input_type> receiver_type; - typedef receiver<output_type> successor_type; - typedef sender<input_type> predecessor_type; - typedef receiver_gateway<output_type> gateway_type; - typedef async_body_base<gateway_type> async_body_base_type; - typedef typename base_type::output_ports_type output_ports_type; - -private: - class receiver_gateway_impl: public receiver_gateway<Output> { - public: - receiver_gateway_impl(async_node* node): my_node(node) {} - void reserve_wait() override { - fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph); - my_node->my_graph.reserve_wait(); - } - - void release_wait() override { - async_node* n = my_node; - graph* g = &n->my_graph; - g->release_wait(); - fgt_async_commit(static_cast<typename async_node::receiver_type *>(n), g); - } - - //! Implements gateway_type::try_put for an external activity to submit a message to FG - bool try_put(const Output &i) override { - return my_node->try_put_impl(i); - } - - private: - async_node* my_node; - } my_gateway; - - //The substitute of 'this' for member construction, to prevent compiler warnings - async_node* self() { return this; } - - //! Implements gateway_type::try_put for an external activity to submit a message to FG - bool try_put_impl(const Output &i) { - multifunction_output<Output> &port_0 = output_port<0>(*this); - broadcast_cache<output_type>& port_successors = port_0.successors(); - fgt_async_try_put_begin(this, &port_0); - // TODO revamp: change to std::list<graph_task*> - graph_task_list tasks; - bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks); - __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(), - "Return status is inconsistent with the method operation." ); - - while( !tasks.empty() ) { - enqueue_in_graph_arena(this->my_graph, tasks.pop_front()); - } - fgt_async_try_put_end(this, &port_0); - return is_at_least_one_put_successful; - } - -public: - template<typename Body> - __TBB_NOINLINE_SYM async_node( - graph &g, size_t concurrency, - Body body, Policy = Policy(), node_priority_t a_priority = no_priority - ) : base_type( - g, concurrency, - async_body<Input, typename base_type::output_ports_type, gateway_type, Body> - (body, &my_gateway), a_priority ), my_gateway(self()) { - fgt_multioutput_node_with_body<1>( - CODEPTR(), FLOW_ASYNC_NODE, - &this->my_graph, static_cast<receiver<input_type> *>(this), - this->output_ports(), this->my_body - ); - } - - template <typename Body, typename... Args> - __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) - : async_node(g, concurrency, body, Policy(), a_priority) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename Body, typename... Args> - __TBB_NOINLINE_SYM async_node( - const node_set<Args...>& nodes, size_t concurrency, Body body, - Policy = Policy(), node_priority_t a_priority = no_priority ) - : async_node(nodes.graph_reference(), concurrency, body, a_priority) { - make_edges_in_order(nodes, *this); - } - - template <typename Body, typename... Args> - __TBB_NOINLINE_SYM async_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) - : async_node(nodes, concurrency, body, Policy(), a_priority) {} -#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - - __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) { - static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway); - static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway); - - fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, - &this->my_graph, static_cast<receiver<input_type> *>(this), - this->output_ports(), this->my_body ); - } - - gateway_type& gateway() { - return my_gateway; - } - - // Define sender< Output > - - //! Add a new successor to this node - bool register_successor(successor_type&) override { - __TBB_ASSERT(false, "Successors must be registered only via ports"); - return false; - } - - //! Removes a successor from this node - bool remove_successor(successor_type&) override { - __TBB_ASSERT(false, "Successors must be removed only via ports"); - return false; - } - - template<typename Body> - Body copy_function_object() { - typedef multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type; - typedef async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type; - mfn_body_type &body_ref = *this->my_body; - async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr()); - return ab.get_body(); - } - -protected: - - void reset_node( reset_flags f) override { - base_type::reset_node(f); - } -}; - -#include "detail/_flow_graph_node_set_impl.h" - -template< typename T > -class overwrite_node : public graph_node, public receiver<T>, public sender<T> { -public: - typedef T input_type; - typedef T output_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - - __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) - : graph_node(g), my_successors(this), my_buffer_is_valid(false) - { - fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, - static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - overwrite_node(const node_set<Args...>& nodes) : overwrite_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor; doesn't take anything from src; default won't work - __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} - - ~overwrite_node() {} - - bool register_successor( successor_type &s ) override { - spin_mutex::scoped_lock l( my_mutex ); - if (my_buffer_is_valid && is_graph_active( my_graph )) { - // We have a valid value that must be forwarded immediately. - bool ret = s.try_put( my_buffer ); - if ( ret ) { - // We add the successor that accepted our put - my_successors.register_successor( s ); - } else { - // In case of reservation a race between the moment of reservation and register_successor can appear, - // because failed reserve does not mean that register_successor is not ready to put a message immediately. - // We have some sort of infinite loop: reserving node tries to set pull state for the edge, - // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. - small_object_allocator allocator{}; - typedef register_predecessor_task task_type; - graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s); - graph_reference().reserve_wait(); - spawn_in_graph_arena( my_graph, *t ); - } - } else { - // No valid value yet, just add as successor - my_successors.register_successor( s ); - } - return true; - } - - bool remove_successor( successor_type &s ) override { - spin_mutex::scoped_lock l( my_mutex ); - my_successors.remove_successor(s); - return true; - } - - bool try_get( input_type &v ) override { - spin_mutex::scoped_lock l( my_mutex ); - if ( my_buffer_is_valid ) { - v = my_buffer; - return true; - } - return false; - } - - //! Reserves an item - bool try_reserve( T &v ) override { - return try_get(v); - } - - //! Releases the reserved item - bool try_release() override { return true; } - - //! Consumes the reserved item - bool try_consume() override { return true; } - - bool is_valid() { - spin_mutex::scoped_lock l( my_mutex ); - return my_buffer_is_valid; - } - - void clear() { - spin_mutex::scoped_lock l( my_mutex ); - my_buffer_is_valid = false; - } - -protected: - - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task( const input_type &v ) override { - spin_mutex::scoped_lock l( my_mutex ); - return try_put_task_impl(v); - } - - graph_task * try_put_task_impl(const input_type &v) { - my_buffer = v; - my_buffer_is_valid = true; - graph_task* rtask = my_successors.try_put_task(v); - if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; - return rtask; - } - - graph& graph_reference() const override { - return my_graph; - } - - //! Breaks an infinite loop between the node reservation and register_successor call - struct register_predecessor_task : public graph_task { - register_predecessor_task( - graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) - : graph_task(g, allocator), o(owner), s(succ) {}; - - task* execute(execution_data& ed) override { - // TODO revamp: investigate why qualification is needed for register_successor() call - using tbb::detail::d1::register_predecessor; - using tbb::detail::d1::register_successor; - if ( !register_predecessor(s, o) ) { - register_successor(o, s); - } - finalize(ed); - return nullptr; - } - - predecessor_type& o; - successor_type& s; - }; - - spin_mutex my_mutex; - broadcast_cache< input_type, null_rw_mutex > my_successors; - input_type my_buffer; - bool my_buffer_is_valid; - - void reset_node( reset_flags f) override { - my_buffer_is_valid = false; - if (f&rf_clear_edges) { - my_successors.clear(); - } - } -}; // overwrite_node - -template< typename T > -class write_once_node : public overwrite_node<T> { -public: - typedef T input_type; - typedef T output_type; - typedef overwrite_node<T> base_type; - typedef typename receiver<input_type>::predecessor_type predecessor_type; - typedef typename sender<output_type>::successor_type successor_type; - - //! Constructor - __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) { - fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template <typename... Args> - write_once_node(const node_set<Args...>& nodes) : write_once_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - - //! Copy constructor: call base class copy constructor - __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) { - fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), - static_cast<receiver<input_type> *>(this), - static_cast<sender<output_type> *>(this) ); - } - -protected: - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - graph_task *try_put_task( const T &v ) override { - spin_mutex::scoped_lock l( this->my_mutex ); - return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v); - } -}; // write_once_node - -inline void set_name(const graph& g, const char *name) { - fgt_graph_desc(&g, name); -} - -template <typename Output> -inline void set_name(const input_node<Output>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename Input, typename Output, typename Policy> -inline void set_name(const function_node<Input, Output, Policy>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename Output, typename Policy> -inline void set_name(const continue_node<Output,Policy>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const broadcast_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const buffer_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const queue_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const sequencer_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T, typename Compare> -inline void set_name(const priority_queue_node<T, Compare>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T, typename DecrementType> -inline void set_name(const limiter_node<T, DecrementType>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename OutputTuple, typename JP> -inline void set_name(const join_node<OutputTuple, JP>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename... Types> -inline void set_name(const indexer_node<Types...>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const overwrite_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template <typename T> -inline void set_name(const write_once_node<T>& node, const char *name) { - fgt_node_desc(&node, name); -} - -template<typename Input, typename Output, typename Policy> -inline void set_name(const multifunction_node<Input, Output, Policy>& node, const char *name) { - fgt_multioutput_node_desc(&node, name); -} - -template<typename TupleType> -inline void set_name(const split_node<TupleType>& node, const char *name) { - fgt_multioutput_node_desc(&node, name); -} - -template< typename InputTuple, typename OutputTuple > -inline void set_name(const composite_node<InputTuple, OutputTuple>& node, const char *name) { - fgt_multiinput_multioutput_node_desc(&node, name); -} - -template<typename Input, typename Output, typename Policy> -inline void set_name(const async_node<Input, Output, Policy>& node, const char *name) -{ - fgt_multioutput_node_desc(&node, name); -} -} // d1 -} // detail -} // tbb - - -// Include deduction guides for node classes -#include "detail/_flow_graph_nodes_deduction.h" - -namespace tbb { -namespace flow { -inline namespace v1 { - using detail::d1::receiver; - using detail::d1::sender; - - using detail::d1::serial; - using detail::d1::unlimited; - - using detail::d1::reset_flags; - using detail::d1::rf_reset_protocol; - using detail::d1::rf_reset_bodies; - using detail::d1::rf_clear_edges; - - using detail::d1::graph; - using detail::d1::graph_node; - using detail::d1::continue_msg; - - using detail::d1::input_node; - using detail::d1::function_node; - using detail::d1::multifunction_node; - using detail::d1::split_node; - using detail::d1::output_port; - using detail::d1::indexer_node; - using detail::d1::tagged_msg; - using detail::d1::cast_to; - using detail::d1::is_a; - using detail::d1::continue_node; - using detail::d1::overwrite_node; - using detail::d1::write_once_node; - using detail::d1::broadcast_node; - using detail::d1::buffer_node; - using detail::d1::queue_node; - using detail::d1::sequencer_node; - using detail::d1::priority_queue_node; - using detail::d1::limiter_node; - using namespace detail::d1::graph_policy_namespace; - using detail::d1::join_node; - using detail::d1::input_port; - using detail::d1::copy_body; - using detail::d1::make_edge; - using detail::d1::remove_edge; - using detail::d1::tag_value; - using detail::d1::composite_node; - using detail::d1::async_node; - using detail::d1::node_priority_t; - using detail::d1::no_priority; - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - using detail::d1::follows; - using detail::d1::precedes; - using detail::d1::make_node_set; - using detail::d1::make_edges; -#endif - -} // v1 -} // flow - - using detail::d1::flow_control; - -namespace profiling { - using detail::d1::set_name; -} // profiling - -} // tbb - - -#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) - // We don't do pragma pop here, since it still gives warning on the USER side - #undef __TBB_NOINLINE_SYM -#endif - -#endif // __TBB_flow_graph_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_H +#define __TBB_flow_graph_H + +#include <atomic> +#include <memory> +#include <type_traits> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "spin_mutex.h" +#include "null_mutex.h" +#include "spin_rw_mutex.h" +#include "null_rw_mutex.h" +#include "detail/_pipeline_filters.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" +#include "cache_aligned_allocator.h" +#include "detail/_exception.h" +#include "detail/_template_helpers.h" +#include "detail/_aggregator.h" +#include "detail/_allocator_traits.h" +#include "profiling.h" +#include "task_arena.h" + +#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) + #if __INTEL_COMPILER + // Disabled warning "routine is both inline and noinline" + #pragma warning (push) + #pragma warning( disable: 2196 ) + #endif + #define __TBB_NOINLINE_SYM __attribute__((noinline)) +#else + #define __TBB_NOINLINE_SYM +#endif + +#include <tuple> +#include <list> +#include <queue> + +/** @file + \brief The graph related classes and functions + + There are some applications that best express dependencies as messages + passed between nodes in a graph. These messages may contain data or + simply act as signals that a predecessors has completed. The graph + class and its associated node classes can be used to express such + applications. +*/ + +namespace tbb { +namespace detail { + +namespace d1 { + +//! An enumeration the provides the two most common concurrency levels: unlimited and serial +enum concurrency { unlimited = 0, serial = 1 }; + +//! A generic null type +struct null_type {}; + +//! An empty class used for messages that mean "I'm done" +class continue_msg {}; + +//! Forward declaration section +template< typename T > class sender; +template< typename T > class receiver; +class continue_receiver; + +template< typename T, typename U > class limiter_node; // needed for resetting decrementer + +template<typename T, typename M> class successor_cache; +template<typename T, typename M> class broadcast_cache; +template<typename T, typename M> class round_robin_cache; +template<typename T, typename M> class predecessor_cache; +template<typename T, typename M> class reservable_predecessor_cache; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +namespace order { +struct following; +struct preceding; +} +template<typename Order, typename... Args> struct node_set; +#endif + + +} // namespace d1 +} // namespace detail +} // namespace tbb + +//! The graph class +#include "detail/_flow_graph_impl.h" + +namespace tbb { +namespace detail { +namespace d1 { + +static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) { + if (second->priority > first->priority) + return std::make_pair(second, first); + return std::make_pair(first, second); +} + +// submit task if necessary. Returns the non-enqueued task if there is one. +static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) { + // if no RHS task, don't change left. + if (right == NULL) return left; + // right != NULL + if (left == NULL) return right; + if (left == SUCCESSFULLY_ENQUEUED) return right; + // left contains a task + if (right != SUCCESSFULLY_ENQUEUED) { + // both are valid tasks + auto tasks_pair = order_tasks(left, right); + spawn_in_graph_arena(g, *tasks_pair.first); + return tasks_pair.second; + } + return left; +} + +//! Pure virtual template class that defines a sender of messages of type T +template< typename T > +class sender { +public: + virtual ~sender() {} + + //! Request an item from the sender + virtual bool try_get( T & ) { return false; } + + //! Reserves an item in the sender + virtual bool try_reserve( T & ) { return false; } + + //! Releases the reserved item + virtual bool try_release( ) { return false; } + + //! Consumes the reserved item + virtual bool try_consume( ) { return false; } + +protected: + //! The output type of this sender + typedef T output_type; + + //! The successor type for this node + typedef receiver<T> successor_type; + + //! Add a new successor to this node + virtual bool register_successor( successor_type &r ) = 0; + + //! Removes a successor from this node + virtual bool remove_successor( successor_type &r ) = 0; + + template<typename C> + friend bool register_successor(sender<C>& s, receiver<C>& r); + + template<typename C> + friend bool remove_successor (sender<C>& s, receiver<C>& r); +}; // class sender<T> + +template<typename C> +bool register_successor(sender<C>& s, receiver<C>& r) { + return s.register_successor(r); +} + +template<typename C> +bool remove_successor(sender<C>& s, receiver<C>& r) { + return s.remove_successor(r); +} + +//! Pure virtual template class that defines a receiver of messages of type T +template< typename T > +class receiver { +public: + //! Destructor + virtual ~receiver() {} + + //! Put an item to the receiver + bool try_put( const T& t ) { + graph_task *res = try_put_task(t); + if (!res) return false; + if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); + return true; + } + + //! put item to successor; return task to run the successor if possible. +protected: + //! The input type of this receiver + typedef T input_type; + + //! The predecessor type for this node + typedef sender<T> predecessor_type; + + template< typename R, typename B > friend class run_and_put_task; + template< typename X, typename Y > friend class broadcast_cache; + template< typename X, typename Y > friend class round_robin_cache; + virtual graph_task *try_put_task(const T& t) = 0; + virtual graph& graph_reference() const = 0; + + template<typename TT, typename M> friend class successor_cache; + virtual bool is_continue_receiver() { return false; } + + // TODO revamp: reconsider the inheritance and move node priority out of receiver + virtual node_priority_t priority() const { return no_priority; } + + //! Add a predecessor to the node + virtual bool register_predecessor( predecessor_type & ) { return false; } + + //! Remove a predecessor from the node + virtual bool remove_predecessor( predecessor_type & ) { return false; } + + template <typename C> + friend bool register_predecessor(receiver<C>& r, sender<C>& s); + template <typename C> + friend bool remove_predecessor (receiver<C>& r, sender<C>& s); +}; // class receiver<T> + +template <typename C> +bool register_predecessor(receiver<C>& r, sender<C>& s) { + return r.register_predecessor(s); +} + +template <typename C> +bool remove_predecessor(receiver<C>& r, sender<C>& s) { + return r.remove_predecessor(s); +} + +//! Base class for receivers of completion messages +/** These receivers automatically reset, but cannot be explicitly waited on */ +class continue_receiver : public receiver< continue_msg > { +protected: + + //! Constructor + explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) { + my_predecessor_count = my_initial_predecessor_count = number_of_predecessors; + my_current_count = 0; + my_priority = a_priority; + } + + //! Copy constructor + continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() { + my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count; + my_current_count = 0; + my_priority = src.my_priority; + } + + //! Increments the trigger threshold + bool register_predecessor( predecessor_type & ) override { + spin_mutex::scoped_lock l(my_mutex); + ++my_predecessor_count; + return true; + } + + //! Decrements the trigger threshold + /** Does not check to see if the removal of the predecessor now makes the current count + exceed the new threshold. So removing a predecessor while the graph is active can cause + unexpected results. */ + bool remove_predecessor( predecessor_type & ) override { + spin_mutex::scoped_lock l(my_mutex); + --my_predecessor_count; + return true; + } + + //! The input type + typedef continue_msg input_type; + + //! The predecessor type for this node + typedef receiver<input_type>::predecessor_type predecessor_type; + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + // execute body is supposed to be too small to create a task for. + graph_task* try_put_task( const input_type & ) override { + { + spin_mutex::scoped_lock l(my_mutex); + if ( ++my_current_count < my_predecessor_count ) + return SUCCESSFULLY_ENQUEUED; + else + my_current_count = 0; + } + graph_task* res = execute(); + return res? res : SUCCESSFULLY_ENQUEUED; + } + + spin_mutex my_mutex; + int my_predecessor_count; + int my_current_count; + int my_initial_predecessor_count; + node_priority_t my_priority; + // the friend declaration in the base class did not eliminate the "protected class" + // error in gcc 4.1.2 + template<typename U, typename V> friend class limiter_node; + + virtual void reset_receiver( reset_flags f ) { + my_current_count = 0; + if (f & rf_clear_edges) { + my_predecessor_count = my_initial_predecessor_count; + } + } + + //! Does whatever should happen when the threshold is reached + /** This should be very fast or else spawn a task. This is + called while the sender is blocked in the try_put(). */ + virtual graph_task* execute() = 0; + template<typename TT, typename M> friend class successor_cache; + bool is_continue_receiver() override { return true; } + + node_priority_t priority() const override { return my_priority; } +}; // class continue_receiver + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + template <typename K, typename T> + K key_from_message( const T &t ) { + return t.key(); + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + +} // d1 +} // detail +} // tbb + +#include "detail/_flow_graph_trace_impl.h" +#include "detail/_hash_compare.h" + +namespace tbb { +namespace detail { +namespace d1 { + +#include "detail/_flow_graph_body_impl.h" +#include "detail/_flow_graph_cache_impl.h" +#include "detail/_flow_graph_types_impl.h" + +using namespace graph_policy_namespace; + +template <typename C, typename N> +graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL) +{ + if (begin) current_node = my_graph->my_nodes; + //else it is an end iterator by default +} + +template <typename C, typename N> +typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const { + __TBB_ASSERT(current_node, "graph_iterator at end"); + return *operator->(); +} + +template <typename C, typename N> +typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const { + return current_node; +} + +template <typename C, typename N> +void graph_iterator<C,N>::internal_forward() { + if (current_node) current_node = current_node->next; +} + +//! Constructs a graph with isolated task_group_context +inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { + prepare_task_arena(); + own_context = true; + cancelled = false; + caught_exception = false; + my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS); + fgt_graph(this); + my_is_active = true; +} + +inline graph::graph(task_group_context& use_this_context) : + my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { + prepare_task_arena(); + own_context = false; + cancelled = false; + caught_exception = false; + fgt_graph(this); + my_is_active = true; +} + +inline graph::~graph() { + wait_for_all(); + if (own_context) { + my_context->~task_group_context(); + r1::cache_aligned_deallocate(my_context); + } + delete my_task_arena; +} + +inline void graph::reserve_wait() { + my_wait_context.reserve(); + fgt_reserve_wait(this); +} + +inline void graph::release_wait() { + fgt_release_wait(this); + my_wait_context.release(); +} + +inline void graph::register_node(graph_node *n) { + n->next = NULL; + { + spin_mutex::scoped_lock lock(nodelist_mutex); + n->prev = my_nodes_last; + if (my_nodes_last) my_nodes_last->next = n; + my_nodes_last = n; + if (!my_nodes) my_nodes = n; + } +} + +inline void graph::remove_node(graph_node *n) { + { + spin_mutex::scoped_lock lock(nodelist_mutex); + __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes"); + if (n->prev) n->prev->next = n->next; + if (n->next) n->next->prev = n->prev; + if (my_nodes_last == n) my_nodes_last = n->prev; + if (my_nodes == n) my_nodes = n->next; + } + n->prev = n->next = NULL; +} + +inline void graph::reset( reset_flags f ) { + // reset context + deactivate_graph(*this); + + my_context->reset(); + cancelled = false; + caught_exception = false; + // reset all the nodes comprising the graph + for(iterator ii = begin(); ii != end(); ++ii) { + graph_node *my_p = &(*ii); + my_p->reset_node(f); + } + // Reattach the arena. Might be useful to run the graph in a particular task_arena + // while not limiting graph lifetime to a single task_arena::execute() call. + prepare_task_arena( /*reinit=*/true ); + activate_graph(*this); +} + +inline void graph::cancel() { + my_context->cancel_group_execution(); +} + +inline graph::iterator graph::begin() { return iterator(this, true); } + +inline graph::iterator graph::end() { return iterator(this, false); } + +inline graph::const_iterator graph::begin() const { return const_iterator(this, true); } + +inline graph::const_iterator graph::end() const { return const_iterator(this, false); } + +inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); } + +inline graph::const_iterator graph::cend() const { return const_iterator(this, false); } + +inline graph_node::graph_node(graph& g) : my_graph(g) { + my_graph.register_node(this); +} + +inline graph_node::~graph_node() { + my_graph.remove_node(this); +} + +#include "detail/_flow_graph_node_impl.h" + + +//! An executable node that acts as a source, i.e. it has no predecessors + +template < typename Output > +class input_node : public graph_node, public sender< Output > { +public: + //! The type of the output message, which is complete + typedef Output output_type; + + //! The type of successors of this node + typedef typename sender<output_type>::successor_type successor_type; + + // Input node has no input type + typedef null_type input_type; + + //! Constructor for a node with a successor + template< typename Body > + __TBB_NOINLINE_SYM input_node( graph &g, Body body ) + : graph_node(g), my_active(false) + , my_body( new input_body_leaf< output_type, Body>(body) ) + , my_init_body( new input_body_leaf< output_type, Body>(body) ) + , my_successors(this), my_reserved(false), my_has_cached_item(false) + { + fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, + static_cast<sender<output_type> *>(this), this->my_body); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Successors> + input_node( const node_set<order::preceding, Successors...>& successors, Body body ) + : input_node(successors.graph_reference(), body) + { + make_edges(*this, successors); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM input_node( const input_node& src ) + : graph_node(src.my_graph), sender<Output>() + , my_active(false) + , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) + , my_successors(this), my_reserved(false), my_has_cached_item(false) + { + fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, + static_cast<sender<output_type> *>(this), this->my_body); + } + + //! The destructor + ~input_node() { delete my_body; delete my_init_body; } + + //! Add a new successor to this node + bool register_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_successors.register_successor(r); + if ( my_active ) + spawn_put(); + return true; + } + + //! Removes a successor from this node + bool remove_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_successors.remove_successor(r); + return true; + } + + //! Request an item from the node + bool try_get( output_type &v ) override { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) + return false; + + if ( my_has_cached_item ) { + v = my_cached_item; + my_has_cached_item = false; + return true; + } + // we've been asked to provide an item, but we have none. enqueue a task to + // provide one. + if ( my_active ) + spawn_put(); + return false; + } + + //! Reserves an item. + bool try_reserve( output_type &v ) override { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) { + return false; + } + + if ( my_has_cached_item ) { + v = my_cached_item; + my_reserved = true; + return true; + } else { + return false; + } + } + + //! Release a reserved item. + /** true = item has been released and so remains in sender, dest must request or reserve future items */ + bool try_release( ) override { + spin_mutex::scoped_lock lock(my_mutex); + __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" ); + my_reserved = false; + if(!my_successors.empty()) + spawn_put(); + return true; + } + + //! Consumes a reserved item + bool try_consume( ) override { + spin_mutex::scoped_lock lock(my_mutex); + __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" ); + my_reserved = false; + my_has_cached_item = false; + if ( !my_successors.empty() ) { + spawn_put(); + } + return true; + } + + //! Activates a node that was created in the inactive state + void activate() { + spin_mutex::scoped_lock lock(my_mutex); + my_active = true; + if (!my_successors.empty()) + spawn_put(); + } + + template<typename Body> + Body copy_function_object() { + input_body<output_type> &body_ref = *this->my_body; + return dynamic_cast< input_body_leaf<output_type, Body> & >(body_ref).get_body(); + } + +protected: + + //! resets the input_node to its initial state + void reset_node( reset_flags f) override { + my_active = false; + my_reserved = false; + my_has_cached_item = false; + + if(f & rf_clear_edges) my_successors.clear(); + if(f & rf_reset_bodies) { + input_body<output_type> *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + +private: + spin_mutex my_mutex; + bool my_active; + input_body<output_type> *my_body; + input_body<output_type> *my_init_body; + broadcast_cache< output_type > my_successors; + bool my_reserved; + bool my_has_cached_item; + output_type my_cached_item; + + // used by apply_body_bypass, can invoke body of node. + bool try_reserve_apply_body(output_type &v) { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) { + return false; + } + if ( !my_has_cached_item ) { + flow_control control; + + fgt_begin_body( my_body ); + + my_cached_item = (*my_body)(control); + my_has_cached_item = !control.is_pipeline_stopped; + + fgt_end_body( my_body ); + } + if ( my_has_cached_item ) { + v = my_cached_item; + my_reserved = true; + return true; + } else { + return false; + } + } + + graph_task* create_put_task() { + small_object_allocator allocator{}; + typedef input_node_task_bypass< input_node<output_type> > task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + return t; + } + + //! Spawns a task that applies the body + void spawn_put( ) { + if(is_graph_active(this->my_graph)) { + spawn_in_graph_arena(this->my_graph, *create_put_task()); + } + } + + friend class input_node_task_bypass< input_node<output_type> >; + //! Applies the body. Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it. + graph_task* apply_body_bypass( ) { + output_type v; + if ( !try_reserve_apply_body(v) ) + return NULL; + + graph_task *last_task = my_successors.try_put_task(v); + if ( last_task ) + try_consume(); + else + try_release(); + return last_task; + } +}; // class input_node + +//! Implements a function node that supports Input -> Output +template<typename Input, typename Output = continue_msg, typename Policy = queueing> +class function_node + : public graph_node + , public function_input< Input, Output, Policy, cache_aligned_allocator<Input> > + , public function_output<Output> +{ + typedef cache_aligned_allocator<Input> internals_allocator; + +public: + typedef Input input_type; + typedef Output output_type; + typedef function_input<input_type,output_type,Policy,internals_allocator> input_impl_type; + typedef function_input_queue<input_type, internals_allocator> input_queue_type; + typedef function_output<output_type> fOutput_type; + typedef typename input_impl_type::predecessor_type predecessor_type; + typedef typename fOutput_type::successor_type successor_type; + + using input_impl_type::my_predecessors; + + //! Constructor + // input_queue_type is allocated here, but destroyed in the function_input_base. + // TODO: pass the graph_buffer_policy to the function_input_base so it can all + // be done in one place. This would be an interface-breaking change. + template< typename Body > + __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) + : graph_node(g), input_impl_type(g, concurrency, body, a_priority), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority ) + : function_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority ) + : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority ) + : function_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + //! Copy constructor + __TBB_NOINLINE_SYM function_node( const function_node& src ) : + graph_node(src.my_graph), + input_impl_type(src), + fOutput_type(src.my_graph) { + fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + using input_impl_type::try_put_task; + + broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } + + void reset_node(reset_flags f) override { + input_impl_type::reset_function_input(f); + // TODO: use clear() instead. + if(f & rf_clear_edges) { + successors().clear(); + my_predecessors.clear(); + } + __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty"); + __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty"); + } + +}; // class function_node + +//! implements a function node that supports Input -> (set of outputs) +// Output is a tuple of output types. +template<typename Input, typename Output, typename Policy = queueing> +class multifunction_node : + public graph_node, + public multifunction_input + < + Input, + typename wrap_tuple_elements< + std::tuple_size<Output>::value, // #elements in tuple + multifunction_output, // wrap this around each element + Output // the tuple providing the types + >::type, + Policy, + cache_aligned_allocator<Input> + > +{ + typedef cache_aligned_allocator<Input> internals_allocator; + +protected: + static const int N = std::tuple_size<Output>::value; +public: + typedef Input input_type; + typedef null_type output_type; + typedef typename wrap_tuple_elements<N,multifunction_output, Output>::type output_ports_type; + typedef multifunction_input< + input_type, output_ports_type, Policy, internals_allocator> input_impl_type; + typedef function_input_queue<input_type, internals_allocator> input_queue_type; +private: + using input_impl_type::my_predecessors; +public: + template<typename Body> + __TBB_NOINLINE_SYM multifunction_node( + graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) { + fgt_multioutput_node_with_body<N>( + CODEPTR(), FLOW_MULTIFUNCTION_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body + ); + } + + template <typename Body> + __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) + : multifunction_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority) + : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) + : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) : + graph_node(other.my_graph), input_impl_type(other) { + fgt_multioutput_node_with_body<N>( CODEPTR(), FLOW_MULTIFUNCTION_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body ); + } + + // all the guts are in multifunction_input... +protected: + void reset_node(reset_flags f) override { input_impl_type::reset(f); } +}; // multifunction_node + +//! split_node: accepts a tuple as input, forwards each element of the tuple to its +// successors. The node has unlimited concurrency, so it does not reject inputs. +template<typename TupleType> +class split_node : public graph_node, public receiver<TupleType> { + static const int N = std::tuple_size<TupleType>::value; + typedef receiver<TupleType> base_type; +public: + typedef TupleType input_type; + typedef typename wrap_tuple_elements< + N, // #elements in tuple + multifunction_output, // wrap this around each element + TupleType // the tuple providing the types + >::type output_ports_type; + + __TBB_NOINLINE_SYM explicit split_node(graph &g) + : graph_node(g), + my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)) + { + fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), this->output_ports()); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM split_node(const node_set<Args...>& nodes) : split_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM split_node(const split_node& other) + : graph_node(other.my_graph), base_type(other), + my_output_ports(init_output_ports<output_ports_type>::call(other.my_graph, my_output_ports)) + { + fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), this->output_ports()); + } + + output_ports_type &output_ports() { return my_output_ports; } + +protected: + graph_task *try_put_task(const TupleType& t) override { + // Sending split messages in parallel is not justified, as overheads would prevail. + // Also, we do not have successors here. So we just tell the task returned here is successful. + return emit_element<N>::emit_this(this->my_graph, t, output_ports()); + } + void reset_node(reset_flags f) override { + if (f & rf_clear_edges) + clear_element<N>::clear_this(my_output_ports); + + __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "split_node reset failed"); + } + graph& graph_reference() const override { + return my_graph; + } + +private: + output_ports_type my_output_ports; +}; + +//! Implements an executable node that supports continue_msg -> Output +template <typename Output, typename Policy = Policy<void> > +class continue_node : public graph_node, public continue_input<Output, Policy>, + public function_output<Output> { +public: + typedef continue_msg input_type; + typedef Output output_type; + typedef continue_input<Output, Policy> input_impl_type; + typedef function_output<output_type> fOutput_type; + typedef typename input_impl_type::predecessor_type predecessor_type; + typedef typename fOutput_type::successor_type successor_type; + + //! Constructor for executable node with continue_msg -> Output + template <typename Body > + __TBB_NOINLINE_SYM continue_node( + graph &g, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g), input_impl_type( g, body, a_priority ), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + continue_node( graph& g, Body body, node_priority_t a_priority ) + : continue_node(g, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority ) + : continue_node(nodes.graph_reference(), body, p, a_priority ) { + make_edges_in_order(nodes, *this); + } + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, Body body, node_priority_t a_priority) + : continue_node(nodes, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + //! Constructor for executable node with continue_msg -> Output + template <typename Body > + __TBB_NOINLINE_SYM continue_node( + graph &g, int number_of_predecessors, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g) + , input_impl_type(g, number_of_predecessors, body, a_priority), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority) + : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, int number_of_predecessors, + Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) + : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, int number_of_predecessors, + Body body, node_priority_t a_priority ) + : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {} +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM continue_node( const continue_node& src ) : + graph_node(src.my_graph), input_impl_type(src), + function_output<Output>(src.my_graph) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + using input_impl_type::try_put_task; + broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } + + void reset_node(reset_flags f) override { + input_impl_type::reset_receiver(f); + if(f & rf_clear_edges)successors().clear(); + __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset"); + } +}; // continue_node + +//! Forwards messages of type T to all successors +template <typename T> +class broadcast_node : public graph_node, public receiver<T>, public sender<T> { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; +private: + broadcast_cache<input_type> my_successors; +public: + + __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { + fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + broadcast_node(const node_set<Args...>& nodes) : broadcast_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} + + //! Adds a successor + bool register_successor( successor_type &r ) override { + my_successors.register_successor( r ); + return true; + } + + //! Removes s as a successor + bool remove_successor( successor_type &r ) override { + my_successors.remove_successor( r ); + return true; + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! build a task to run the successor if possible. Default is old behavior. + graph_task *try_put_task(const T& t) override { + graph_task *new_task = my_successors.try_put_task(t); + if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; + return new_task; + } + + graph& graph_reference() const override { + return my_graph; + } + + void reset_node(reset_flags f) override { + if (f&rf_clear_edges) { + my_successors.clear(); + } + __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node"); + } +}; // broadcast_node + +//! Forwards messages in arbitrary order +template <typename T> +class buffer_node + : public graph_node + , public reservable_item_buffer< T, cache_aligned_allocator<T> > + , public receiver<T>, public sender<T> +{ + typedef cache_aligned_allocator<T> internals_allocator; + +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + typedef buffer_node<T> class_type; + +protected: + typedef size_t size_type; + round_robin_cache< T, null_rw_mutex > my_successors; + + friend class forward_task_bypass< class_type >; + + enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task + }; + + // implements the aggregator_operation concept + class buffer_operation : public aggregated_operation< buffer_operation > { + public: + char type; + T* elem; + graph_task* ltask; + successor_type *r; + + buffer_operation(const T& e, op_type t) : type(char(t)) + , elem(const_cast<T*>(&e)) , ltask(NULL) + {} + buffer_operation(op_type t) : type(char(t)), ltask(NULL) {} + }; + + bool forwarder_busy; + typedef aggregating_functor<class_type, buffer_operation> handler_type; + friend class aggregating_functor<class_type, buffer_operation>; + aggregator< handler_type, buffer_operation> my_aggregator; + + virtual void handle_operations(buffer_operation *op_list) { + handle_operations_impl(op_list, this); + } + + template<typename derived_type> + void handle_operations_impl(buffer_operation *op_list, derived_type* derived) { + __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); + + buffer_operation *tmp = NULL; + bool try_forwarding = false; + while (op_list) { + tmp = op_list; + op_list = op_list->next; + switch (tmp->type) { + case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break; + case rem_succ: internal_rem_succ(tmp); break; + case req_item: internal_pop(tmp); break; + case res_item: internal_reserve(tmp); break; + case rel_res: internal_release(tmp); try_forwarding = true; break; + case con_res: internal_consume(tmp); try_forwarding = true; break; + case put_item: try_forwarding = internal_push(tmp); break; + case try_fwd_task: internal_forward_task(tmp); break; + } + } + + derived->order(); + + if (try_forwarding && !forwarder_busy) { + if(is_graph_active(this->my_graph)) { + forwarder_busy = true; + typedef forward_task_bypass<class_type> task_type; + small_object_allocator allocator{}; + graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this); + my_graph.reserve_wait(); + // tmp should point to the last item handled by the aggregator. This is the operation + // the handling thread enqueued. So modifying that record will be okay. + // TODO revamp: check that the issue is still present + // workaround for icc bug (at least 12.0 and 13.0) + // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list + // argument types are: (graph, graph_task *, graph_task *) + graph_task *z = tmp->ltask; + graph &g = this->my_graph; + tmp->ltask = combine_tasks(g, z, new_task); // in case the op generated a task + } + } + } // handle_operations + + inline graph_task *grab_forwarding_task( buffer_operation &op_data) { + return op_data.ltask; + } + + inline bool enqueue_forwarding_task(buffer_operation &op_data) { + graph_task *ft = grab_forwarding_task(op_data); + if(ft) { + spawn_in_graph_arena(graph_reference(), *ft); + return true; + } + return false; + } + + //! This is executed by an enqueued task, the "forwarder" + virtual graph_task *forward_task() { + buffer_operation op_data(try_fwd_task); + graph_task *last_task = NULL; + do { + op_data.status = WAIT; + op_data.ltask = NULL; + my_aggregator.execute(&op_data); + + // workaround for icc bug + graph_task *xtask = op_data.ltask; + graph& g = this->my_graph; + last_task = combine_tasks(g, last_task, xtask); + } while (op_data.status ==SUCCEEDED); + return last_task; + } + + //! Register successor + virtual void internal_reg_succ(buffer_operation *op) { + my_successors.register_successor(*(op->r)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + + //! Remove successor + virtual void internal_rem_succ(buffer_operation *op) { + my_successors.remove_successor(*(op->r)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +private: + void order() {} + + bool is_item_valid() { + return this->my_item_valid(this->my_tail - 1); + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task *new_task = my_successors.try_put_task(this->back()); + if (new_task) { + // workaround for icc bug + graph& g = this->my_graph; + last_task = combine_tasks(g, last_task, new_task); + this->destroy_back(); + } + } + +protected: + //! Tries to forward valid items to successors + virtual void internal_forward_task(buffer_operation *op) { + internal_forward_task_impl(op, this); + } + + template<typename derived_type> + void internal_forward_task_impl(buffer_operation *op, derived_type* derived) { + __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); + + if (this->my_reserved || !derived->is_item_valid()) { + op->status.store(FAILED, std::memory_order_release); + this->forwarder_busy = false; + return; + } + // Try forwarding, giving each successor a chance + graph_task* last_task = NULL; + size_type counter = my_successors.size(); + for (; counter > 0 && derived->is_item_valid(); --counter) + derived->try_put_and_add_task(last_task); + + op->ltask = last_task; // return task + if (last_task && !counter) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + forwarder_busy = false; + } + } + + virtual bool internal_push(buffer_operation *op) { + this->push_back(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + return true; + } + + virtual void internal_pop(buffer_operation *op) { + if(this->pop_back(*(op->elem))) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + } + } + + virtual void internal_reserve(buffer_operation *op) { + if(this->reserve_front(*(op->elem))) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + } + } + + virtual void internal_consume(buffer_operation *op) { + this->consume_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + + virtual void internal_release(buffer_operation *op) { + this->release_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +public: + //! Constructor + __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) + : graph_node(g), reservable_item_buffer<T, internals_allocator>(), receiver<T>(), + sender<T>(), my_successors(this), forwarder_busy(false) + { + my_aggregator.initialize_handler(handler_type(this)); + fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + buffer_node(const node_set<Args...>& nodes) : buffer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} + + // + // message sender implementation + // + + //! Adds a new successor. + /** Adds successor r to the list of successors; may forward tasks. */ + bool register_successor( successor_type &r ) override { + buffer_operation op_data(reg_succ); + op_data.r = &r; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Removes a successor. + /** Removes successor r from the list of successors. + It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ + bool remove_successor( successor_type &r ) override { + // TODO revamp: investigate why full qualification is necessary here + tbb::detail::d1::remove_predecessor(r, *this); + buffer_operation op_data(rem_succ); + op_data.r = &r; + my_aggregator.execute(&op_data); + // even though this operation does not cause a forward, if we are the handler, and + // a forward is scheduled, we may be the first to reach this point after the aggregator, + // and so should check for the task. + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Request an item from the buffer_node + /** true = v contains the returned item<BR> + false = no item has been returned */ + bool try_get( T &v ) override { + buffer_operation op_data(req_item); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return (op_data.status==SUCCEEDED); + } + + //! Reserves an item. + /** false = no item can be reserved<BR> + true = an item is reserved */ + bool try_reserve( T &v ) override { + buffer_operation op_data(res_item); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return (op_data.status==SUCCEEDED); + } + + //! Release a reserved item. + /** true = item has been released and so remains in sender */ + bool try_release() override { + buffer_operation op_data(rel_res); + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Consumes a reserved item. + /** true = item is removed from sender and reservation removed */ + bool try_consume() override { + buffer_operation op_data(con_res); + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! receive an item, return a task *if possible + graph_task *try_put_task(const T &t) override { + buffer_operation op_data(t, put_item); + my_aggregator.execute(&op_data); + graph_task *ft = grab_forwarding_task(op_data); + // sequencer_nodes can return failure (if an item has been previously inserted) + // We have to spawn the returned task if our own operation fails. + + if(ft && op_data.status ==FAILED) { + // we haven't succeeded queueing the item, but for some reason the + // call returned a task (if another request resulted in a successful + // forward this could happen.) Queue the task and reset the pointer. + spawn_in_graph_arena(graph_reference(), *ft); ft = NULL; + } + else if(!ft && op_data.status ==SUCCEEDED) { + ft = SUCCESSFULLY_ENQUEUED; + } + return ft; + } + + graph& graph_reference() const override { + return my_graph; + } + +protected: + void reset_node( reset_flags f) override { + reservable_item_buffer<T, internals_allocator>::reset(); + // TODO: just clear structures + if (f&rf_clear_edges) { + my_successors.clear(); + } + forwarder_busy = false; + } +}; // buffer_node + +//! Forwards messages in FIFO order +template <typename T> +class queue_node : public buffer_node<T> { +protected: + typedef buffer_node<T> base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::buffer_operation queue_operation; + typedef queue_node class_type; + +private: + template<typename> friend class buffer_node; + + bool is_item_valid() { + return this->my_item_valid(this->my_head); + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task *new_task = this->my_successors.try_put_task(this->front()); + if (new_task) { + // workaround for icc bug + graph& graph_ref = this->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); + this->destroy_front(); + } + } + +protected: + void internal_forward_task(queue_operation *op) override { + this->internal_forward_task_impl(op, this); + } + + void internal_pop(queue_operation *op) override { + if ( this->my_reserved || !this->my_item_valid(this->my_head)){ + op->status.store(FAILED, std::memory_order_release); + } + else { + this->pop_front(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + } + void internal_reserve(queue_operation *op) override { + if (this->my_reserved || !this->my_item_valid(this->my_head)) { + op->status.store(FAILED, std::memory_order_release); + } + else { + this->reserve_front(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + } + void internal_consume(queue_operation *op) override { + this->consume_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) { + fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + queue_node( const node_set<Args...>& nodes) : queue_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) { + fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + + +protected: + void reset_node( reset_flags f) override { + base_type::reset_node(f); + } +}; // queue_node + +//! Forwards messages in sequence order +template <typename T> +class sequencer_node : public queue_node<T> { + function_body< T, size_t > *my_sequencer; + // my_sequencer should be a benign function and must be callable + // from a parallel context. Does this mean it needn't be reset? +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + template< typename Sequencer > + __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node<T>(g), + my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) { + fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Sequencer, typename... Args> + sequencer_node( const node_set<Args...>& nodes, const Sequencer& s) + : sequencer_node(nodes.graph_reference(), s) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node<T>(src), + my_sequencer( src.my_sequencer->clone() ) { + fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + + //! Destructor + ~sequencer_node() { delete my_sequencer; } + +protected: + typedef typename buffer_node<T>::size_type size_type; + typedef typename buffer_node<T>::buffer_operation sequencer_operation; + +private: + bool internal_push(sequencer_operation *op) override { + size_type tag = (*my_sequencer)(*(op->elem)); +#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES + if (tag < this->my_head) { + // have already emitted a message with this tag + op->status.store(FAILED, std::memory_order_release); + return false; + } +#endif + // cannot modify this->my_tail now; the buffer would be inconsistent. + size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail; + + if (this->size(new_tail) > this->capacity()) { + this->grow_my_array(this->size(new_tail)); + } + this->my_tail = new_tail; + + const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; + op->status.store(res, std::memory_order_release); + return res ==SUCCEEDED; + } +}; // sequencer_node + +//! Forwards messages in priority order +template<typename T, typename Compare = std::less<T>> +class priority_queue_node : public buffer_node<T> { +public: + typedef T input_type; + typedef T output_type; + typedef buffer_node<T> base_type; + typedef priority_queue_node class_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() ) + : buffer_node<T>(g), compare(comp), mark(0) { + fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + priority_queue_node(const node_set<Args...>& nodes, const Compare& comp = Compare()) + : priority_queue_node(nodes.graph_reference(), comp) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src ) + : buffer_node<T>(src), mark(0) + { + fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +protected: + + void reset_node( reset_flags f) override { + mark = 0; + base_type::reset_node(f); + } + + typedef typename buffer_node<T>::size_type size_type; + typedef typename buffer_node<T>::item_type item_type; + typedef typename buffer_node<T>::buffer_operation prio_operation; + + //! Tries to forward valid items to successors + void internal_forward_task(prio_operation *op) override { + this->internal_forward_task_impl(op, this); + } + + void handle_operations(prio_operation *op_list) override { + this->handle_operations_impl(op_list, this); + } + + bool internal_push(prio_operation *op) override { + prio_push(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + return true; + } + + void internal_pop(prio_operation *op) override { + // if empty or already reserved, don't pop + if ( this->my_reserved == true || this->my_tail == 0 ) { + op->status.store(FAILED, std::memory_order_release); + return; + } + + *(op->elem) = prio(); + op->status.store(SUCCEEDED, std::memory_order_release); + prio_pop(); + + } + + // pops the highest-priority item, saves copy + void internal_reserve(prio_operation *op) override { + if (this->my_reserved == true || this->my_tail == 0) { + op->status.store(FAILED, std::memory_order_release); + return; + } + this->my_reserved = true; + *(op->elem) = prio(); + reserved_item = *(op->elem); + op->status.store(SUCCEEDED, std::memory_order_release); + prio_pop(); + } + + void internal_consume(prio_operation *op) override { + op->status.store(SUCCEEDED, std::memory_order_release); + this->my_reserved = false; + reserved_item = input_type(); + } + + void internal_release(prio_operation *op) override { + op->status.store(SUCCEEDED, std::memory_order_release); + prio_push(reserved_item); + this->my_reserved = false; + reserved_item = input_type(); + } + +private: + template<typename> friend class buffer_node; + + void order() { + if (mark < this->my_tail) heapify(); + __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify"); + } + + bool is_item_valid() { + return this->my_tail > 0; + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task * new_task = this->my_successors.try_put_task(this->prio()); + if (new_task) { + // workaround for icc bug + graph& graph_ref = this->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); + prio_pop(); + } + } + +private: + Compare compare; + size_type mark; + + input_type reserved_item; + + // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item + bool prio_use_tail() { + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test"); + return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1)); + } + + // prio_push: checks that the item will fit, expand array if necessary, put at end + void prio_push(const T &src) { + if ( this->my_tail >= this->my_array_size ) + this->grow_my_array( this->my_tail + 1 ); + (void) this->place_item(this->my_tail, src); + ++(this->my_tail); + __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); + } + + // prio_pop: deletes highest priority item from the array, and if it is item + // 0, move last item to 0 and reheap. If end of array, just destroy and decrement tail + // and mark. Assumes the array has already been tested for emptiness; no failure. + void prio_pop() { + if (prio_use_tail()) { + // there are newly pushed elements; last one higher than top + // copy the data + this->destroy_item(this->my_tail-1); + --(this->my_tail); + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); + return; + } + this->destroy_item(0); + if(this->my_tail > 1) { + // push the last element down heap + __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL); + this->move_item(0,this->my_tail - 1); + } + --(this->my_tail); + if(mark > this->my_tail) --mark; + if (this->my_tail > 1) // don't reheap for heap of size 1 + reheap(); + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); + } + + const T& prio() { + return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); + } + + // turn array into heap + void heapify() { + if(this->my_tail == 0) { + mark = 0; + return; + } + if (!mark) mark = 1; + for (; mark<this->my_tail; ++mark) { // for each unheaped element + size_type cur_pos = mark; + input_type to_place; + this->fetch_item(mark,to_place); + do { // push to_place up the heap + size_type parent = (cur_pos-1)>>1; + if (!compare(this->get_my_item(parent), to_place)) + break; + this->move_item(cur_pos, parent); + cur_pos = parent; + } while( cur_pos ); + (void) this->place_item(cur_pos, to_place); + } + } + + // otherwise heapified array with new root element; rearrange to heap + void reheap() { + size_type cur_pos=0, child=1; + while (child < mark) { + size_type target = child; + if (child+1<mark && + compare(this->get_my_item(child), + this->get_my_item(child+1))) + ++target; + // target now has the higher priority child + if (compare(this->get_my_item(target), + this->get_my_item(cur_pos))) + break; + // swap + this->swap_items(cur_pos, target); + cur_pos = target; + child = (cur_pos<<1)+1; + } + } +}; // priority_queue_node + +//! Forwards messages only if the threshold has not been reached +/** This node forwards items until its threshold is reached. + It contains no buffering. If the downstream node rejects, the + message is dropped. */ +template< typename T, typename DecrementType=continue_msg > +class limiter_node : public graph_node, public receiver< T >, public sender< T > { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later. + +private: + size_t my_threshold; + size_t my_count; // number of successful puts + size_t my_tries; // number of active put attempts + reservable_predecessor_cache< T, spin_mutex > my_predecessors; + spin_mutex my_mutex; + broadcast_cache< T > my_successors; + + //! The internal receiver< DecrementType > that adjusts the count + threshold_regulator< limiter_node<T, DecrementType>, DecrementType > decrement; + + graph_task* decrement_counter( long long delta ) { + { + spin_mutex::scoped_lock lock(my_mutex); + if( delta > 0 && size_t(delta) > my_count ) + my_count = 0; + else if( delta < 0 && size_t(delta) > my_threshold - my_count ) + my_count = my_threshold; + else + my_count -= size_t(delta); // absolute value of delta is sufficiently small + } + return forward_task(); + } + + // Let threshold_regulator call decrement_counter() + friend class threshold_regulator< limiter_node<T, DecrementType>, DecrementType >; + + friend class forward_task_bypass< limiter_node<T,DecrementType> >; + + bool check_conditions() { // always called under lock + return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() ); + } + + // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED + graph_task* forward_task() { + input_type v; + graph_task* rval = NULL; + bool reserved = false; + { + spin_mutex::scoped_lock lock(my_mutex); + if ( check_conditions() ) + ++my_tries; + else + return NULL; + } + + //SUCCESS + // if we can reserve and can put, we consume the reservation + // we increment the count and decrement the tries + if ( (my_predecessors.try_reserve(v)) == true ){ + reserved=true; + if ( (rval = my_successors.try_put_task(v)) != NULL ){ + { + spin_mutex::scoped_lock lock(my_mutex); + ++my_count; + --my_tries; + my_predecessors.try_consume(); + if ( check_conditions() ) { + if ( is_graph_active(this->my_graph) ) { + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + small_object_allocator allocator{}; + graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this ); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *rtask); + } + } + } + return rval; + } + } + //FAILURE + //if we can't reserve, we decrement the tries + //if we can reserve but can't put, we decrement the tries and release the reservation + { + spin_mutex::scoped_lock lock(my_mutex); + --my_tries; + if (reserved) my_predecessors.try_release(); + if ( check_conditions() ) { + if ( is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + __TBB_ASSERT(!rval, "Have two tasks to handle"); + return t; + } + } + return rval; + } + } + + void initialize() { + fgt_node( + CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<receiver<DecrementType> *>(&decrement), + static_cast<sender<output_type> *>(this) + ); + } + +public: + //! Constructor + limiter_node(graph &g, size_t threshold) + : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this) + , my_successors(this), decrement(this) + { + initialize(); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + limiter_node(const node_set<Args...>& nodes, size_t threshold) + : limiter_node(nodes.graph_reference(), threshold) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} + + //! The interface for accessing internal receiver< DecrementType > that adjusts the count + receiver<DecrementType>& decrementer() { return decrement; } + + //! Replace the current successor with this new successor + bool register_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + bool was_empty = my_successors.empty(); + my_successors.register_successor(r); + //spawn a forward task if this is the only successor + if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { + if ( is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *t); + } + } + return true; + } + + //! Removes a successor from this node + /** r.remove_predecessor(*this) is also called. */ + bool remove_successor( successor_type &r ) override { + // TODO revamp: investigate why qualification is needed for remove_predecessor() call + tbb::detail::d1::remove_predecessor(r, *this); + my_successors.remove_successor(r); + return true; + } + + //! Adds src to the list of cached predecessors. + bool register_predecessor( predecessor_type &src ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_predecessors.add( src ); + if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *t); + } + return true; + } + + //! Removes src from the list of cached predecessors. + bool remove_predecessor( predecessor_type &src ) override { + my_predecessors.remove( src ); + return true; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! Puts an item to this receiver + graph_task* try_put_task( const T &t ) override { + { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_count + my_tries >= my_threshold ) + return NULL; + else + ++my_tries; + } + + graph_task* rtask = my_successors.try_put_task(t); + + if ( !rtask ) { // try_put_task failed. + spin_mutex::scoped_lock lock(my_mutex); + --my_tries; + if (check_conditions() && is_graph_active(this->my_graph)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + rtask = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + } + } + else { + spin_mutex::scoped_lock lock(my_mutex); + ++my_count; + --my_tries; + } + return rtask; + } + + graph& graph_reference() const override { return my_graph; } + + void reset_node( reset_flags f) override { + my_count = 0; + if(f & rf_clear_edges) { + my_predecessors.clear(); + my_successors.clear(); + } + else + { + my_predecessors.reset( ); + } + decrement.reset_receiver(f); + } +}; // limiter_node + +#include "detail/_flow_graph_join_impl.h" + +template<typename OutputTuple, typename JP=queueing> class join_node; + +template<typename OutputTuple> +class join_node<OutputTuple,reserving>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename OutputTuple> +class join_node<OutputTuple,queueing>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +// template for key_matching join_node +// tag_matching join_node is a specialization of key_matching, and is source-compatible. +template<typename OutputTuple, typename K, typename KHash> +class join_node<OutputTuple, key_matching<K, KHash> > : public unfolded_join_node<std::tuple_size<OutputTuple>::value, + key_matching_port, OutputTuple, key_matching<K,KHash> > { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + join_node(graph &g) : unfolded_type(g) {} +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + + template<typename __TBB_B0, typename __TBB_B1> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) : + unfolded_type(g, b0, b1, b2, b3, b4) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#if __TBB_VARIADIC_MAX >= 6 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) : + unfolded_type(g, b0, b1, b2, b3, b4, b5) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 7 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) : + unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 8 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 9 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 10 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template < +#if (__clang_major__ == 3 && __clang_minor__ == 4) + // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter. + template<typename...> class node_set, +#endif + typename... Args, typename... Bodies + > + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, Bodies... bodies) + : join_node(nodes.graph_reference(), bodies...) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +// indexer node +#include "detail/_flow_graph_indexer_impl.h" + +// TODO: Implement interface with variadic template or tuple +template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type, + typename T4=null_type, typename T5=null_type, typename T6=null_type, + typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node; + +//indexer node specializations +template<typename T0> +class indexer_node<T0> : public unfolded_indexer_node<std::tuple<T0> > { +private: + static const int N = 1; +public: + typedef std::tuple<T0> InputTuple; + typedef tagged_msg<size_t, T0> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +}; + +template<typename T0, typename T1> +class indexer_node<T0, T1> : public unfolded_indexer_node<std::tuple<T0, T1> > { +private: + static const int N = 2; +public: + typedef std::tuple<T0, T1> InputTuple; + typedef tagged_msg<size_t, T0, T1> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2> +class indexer_node<T0, T1, T2> : public unfolded_indexer_node<std::tuple<T0, T1, T2> > { +private: + static const int N = 3; +public: + typedef std::tuple<T0, T1, T2> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2, typename T3> +class indexer_node<T0, T1, T2, T3> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3> > { +private: + static const int N = 4; +public: + typedef std::tuple<T0, T1, T2, T3> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2, typename T3, typename T4> +class indexer_node<T0, T1, T2, T3, T4> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4> > { +private: + static const int N = 5; +public: + typedef std::tuple<T0, T1, T2, T3, T4> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +#if __TBB_VARIADIC_MAX >= 6 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> +class indexer_node<T0, T1, T2, T3, T4, T5> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5> > { +private: + static const int N = 6; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 6 + +#if __TBB_VARIADIC_MAX >= 7 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6> > { +private: + static const int N = 7; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 7 + +#if __TBB_VARIADIC_MAX >= 8 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> > { +private: + static const int N = 8; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 8 + +#if __TBB_VARIADIC_MAX >= 9 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > { +private: + static const int N = 9; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 9 + +#if __TBB_VARIADIC_MAX >= 10 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +class indexer_node/*default*/ : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > { +private: + static const int N = 10; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 10 + +template< typename T > +inline void internal_make_edge( sender<T> &p, receiver<T> &s ) { + register_successor(p, s); + fgt_make_edge( &p, &s ); +} + +//! Makes an edge between a single predecessor and a single successor +template< typename T > +inline void make_edge( sender<T> &p, receiver<T> &s ) { + internal_make_edge( p, s ); +} + +//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor. +template< typename T, typename V, + typename = typename T::output_ports_type, typename = typename V::input_ports_type > +inline void make_edge( T& output, V& input) { + make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); +} + +//Makes an edge from port 0 of a multi-output predecessor to a receiver. +template< typename T, typename R, + typename = typename T::output_ports_type > +inline void make_edge( T& output, receiver<R>& input) { + make_edge(std::get<0>(output.output_ports()), input); +} + +//Makes an edge from a sender to port 0 of a multi-input successor. +template< typename S, typename V, + typename = typename V::input_ports_type > +inline void make_edge( sender<S>& output, V& input) { + make_edge(output, std::get<0>(input.input_ports())); +} + +template< typename T > +inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) { + remove_successor( p, s ); + fgt_remove_edge( &p, &s ); +} + +//! Removes an edge between a single predecessor and a single successor +template< typename T > +inline void remove_edge( sender<T> &p, receiver<T> &s ) { + internal_remove_edge( p, s ); +} + +//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor. +template< typename T, typename V, + typename = typename T::output_ports_type, typename = typename V::input_ports_type > +inline void remove_edge( T& output, V& input) { + remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); +} + +//Removes an edge between port 0 of a multi-output predecessor and a receiver. +template< typename T, typename R, + typename = typename T::output_ports_type > +inline void remove_edge( T& output, receiver<R>& input) { + remove_edge(std::get<0>(output.output_ports()), input); +} +//Removes an edge between a sender and port 0 of a multi-input successor. +template< typename S, typename V, + typename = typename V::input_ports_type > +inline void remove_edge( sender<S>& output, V& input) { + remove_edge(output, std::get<0>(input.input_ports())); +} + +//! Returns a copy of the body from a function or continue node +template< typename Body, typename Node > +Body copy_body( Node &n ) { + return n.template copy_function_object<Body>(); +} + +//composite_node +template< typename InputTuple, typename OutputTuple > class composite_node; + +template< typename... InputTypes, typename... OutputTypes> +class composite_node <std::tuple<InputTypes...>, std::tuple<OutputTypes...> > : public graph_node { + +public: + typedef std::tuple< receiver<InputTypes>&... > input_ports_type; + typedef std::tuple< sender<OutputTypes>&... > output_ports_type; + +private: + std::unique_ptr<input_ports_type> my_input_ports; + std::unique_ptr<output_ports_type> my_output_ports; + + static const size_t NUM_INPUTS = sizeof...(InputTypes); + static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + composite_node( graph &g ) : graph_node(g) { + fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph ); + } + + template<typename T1, typename T2> + void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) { + static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); + static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); + + fgt_internal_input_alias_helper<T1, NUM_INPUTS>::alias_port( this, input_ports_tuple); + fgt_internal_output_alias_helper<T2, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); + + my_input_ports.reset( new input_ports_type(std::forward<T1>(input_ports_tuple)) ); + my_output_ports.reset( new output_ports_type(std::forward<T2>(output_ports_tuple)) ); + } + + template< typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template< typename... NodeTypes > + void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + input_ports_type& input_ports() { + __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); + return *my_input_ports; + } + + output_ports_type& output_ports() { + __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); + return *my_output_ports; + } +}; // class composite_node + +//composite_node with only input ports +template< typename... InputTypes> +class composite_node <std::tuple<InputTypes...>, std::tuple<> > : public graph_node { +public: + typedef std::tuple< receiver<InputTypes>&... > input_ports_type; + +private: + std::unique_ptr<input_ports_type> my_input_ports; + static const size_t NUM_INPUTS = sizeof...(InputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + composite_node( graph &g ) : graph_node(g) { + fgt_composite( CODEPTR(), this, &g ); + } + + template<typename T> + void set_external_ports(T&& input_ports_tuple) { + static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); + + fgt_internal_input_alias_helper<T, NUM_INPUTS>::alias_port( this, input_ports_tuple); + + my_input_ports.reset( new input_ports_type(std::forward<T>(input_ports_tuple)) ); + } + + template< typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template< typename... NodeTypes > + void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + input_ports_type& input_ports() { + __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); + return *my_input_ports; + } + +}; // class composite_node + +//composite_nodes with only output_ports +template<typename... OutputTypes> +class composite_node <std::tuple<>, std::tuple<OutputTypes...> > : public graph_node { +public: + typedef std::tuple< sender<OutputTypes>&... > output_ports_type; + +private: + std::unique_ptr<output_ports_type> my_output_ports; + static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) { + fgt_composite( CODEPTR(), this, &g ); + } + + template<typename T> + void set_external_ports(T&& output_ports_tuple) { + static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); + + fgt_internal_output_alias_helper<T, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); + + my_output_ports.reset( new output_ports_type(std::forward<T>(output_ports_tuple)) ); + } + + template<typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template<typename... NodeTypes > + void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + output_ports_type& output_ports() { + __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); + return *my_output_ports; + } + +}; // class composite_node + +template<typename Gateway> +class async_body_base: no_assign { +public: + typedef Gateway gateway_type; + + async_body_base(gateway_type *gateway): my_gateway(gateway) { } + void set_gateway(gateway_type *gateway) { + my_gateway = gateway; + } + +protected: + gateway_type *my_gateway; +}; + +template<typename Input, typename Ports, typename Gateway, typename Body> +class async_body: public async_body_base<Gateway> { +public: + typedef async_body_base<Gateway> base_type; + typedef Gateway gateway_type; + + async_body(const Body &body, gateway_type *gateway) + : base_type(gateway), my_body(body) { } + + void operator()( const Input &v, Ports & ) { + my_body(v, *this->my_gateway); + } + + Body get_body() { return my_body; } + +private: + Body my_body; +}; + +//! Implements async node +template < typename Input, typename Output, + typename Policy = queueing_lightweight > +class async_node + : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output > +{ + typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type; + typedef multifunction_input< + Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator<Input>> mfn_input_type; + +public: + typedef Input input_type; + typedef Output output_type; + typedef receiver<input_type> receiver_type; + typedef receiver<output_type> successor_type; + typedef sender<input_type> predecessor_type; + typedef receiver_gateway<output_type> gateway_type; + typedef async_body_base<gateway_type> async_body_base_type; + typedef typename base_type::output_ports_type output_ports_type; + +private: + class receiver_gateway_impl: public receiver_gateway<Output> { + public: + receiver_gateway_impl(async_node* node): my_node(node) {} + void reserve_wait() override { + fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph); + my_node->my_graph.reserve_wait(); + } + + void release_wait() override { + async_node* n = my_node; + graph* g = &n->my_graph; + g->release_wait(); + fgt_async_commit(static_cast<typename async_node::receiver_type *>(n), g); + } + + //! Implements gateway_type::try_put for an external activity to submit a message to FG + bool try_put(const Output &i) override { + return my_node->try_put_impl(i); + } + + private: + async_node* my_node; + } my_gateway; + + //The substitute of 'this' for member construction, to prevent compiler warnings + async_node* self() { return this; } + + //! Implements gateway_type::try_put for an external activity to submit a message to FG + bool try_put_impl(const Output &i) { + multifunction_output<Output> &port_0 = output_port<0>(*this); + broadcast_cache<output_type>& port_successors = port_0.successors(); + fgt_async_try_put_begin(this, &port_0); + // TODO revamp: change to std::list<graph_task*> + graph_task_list tasks; + bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks); + __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(), + "Return status is inconsistent with the method operation." ); + + while( !tasks.empty() ) { + enqueue_in_graph_arena(this->my_graph, tasks.pop_front()); + } + fgt_async_try_put_end(this, &port_0); + return is_at_least_one_put_successful; + } + +public: + template<typename Body> + __TBB_NOINLINE_SYM async_node( + graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : base_type( + g, concurrency, + async_body<Input, typename base_type::output_ports_type, gateway_type, Body> + (body, &my_gateway), a_priority ), my_gateway(self()) { + fgt_multioutput_node_with_body<1>( + CODEPTR(), FLOW_ASYNC_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body + ); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) + : async_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node( + const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy = Policy(), node_priority_t a_priority = no_priority ) + : async_node(nodes.graph_reference(), concurrency, body, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) + : async_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) { + static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway); + static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway); + + fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body ); + } + + gateway_type& gateway() { + return my_gateway; + } + + // Define sender< Output > + + //! Add a new successor to this node + bool register_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be registered only via ports"); + return false; + } + + //! Removes a successor from this node + bool remove_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be removed only via ports"); + return false; + } + + template<typename Body> + Body copy_function_object() { + typedef multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type; + typedef async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type; + mfn_body_type &body_ref = *this->my_body; + async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr()); + return ab.get_body(); + } + +protected: + + void reset_node( reset_flags f) override { + base_type::reset_node(f); + } +}; + +#include "detail/_flow_graph_node_set_impl.h" + +template< typename T > +class overwrite_node : public graph_node, public receiver<T>, public sender<T> { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) + : graph_node(g), my_successors(this), my_buffer_is_valid(false) + { + fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + overwrite_node(const node_set<Args...>& nodes) : overwrite_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor; doesn't take anything from src; default won't work + __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} + + ~overwrite_node() {} + + bool register_successor( successor_type &s ) override { + spin_mutex::scoped_lock l( my_mutex ); + if (my_buffer_is_valid && is_graph_active( my_graph )) { + // We have a valid value that must be forwarded immediately. + bool ret = s.try_put( my_buffer ); + if ( ret ) { + // We add the successor that accepted our put + my_successors.register_successor( s ); + } else { + // In case of reservation a race between the moment of reservation and register_successor can appear, + // because failed reserve does not mean that register_successor is not ready to put a message immediately. + // We have some sort of infinite loop: reserving node tries to set pull state for the edge, + // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. + small_object_allocator allocator{}; + typedef register_predecessor_task task_type; + graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s); + graph_reference().reserve_wait(); + spawn_in_graph_arena( my_graph, *t ); + } + } else { + // No valid value yet, just add as successor + my_successors.register_successor( s ); + } + return true; + } + + bool remove_successor( successor_type &s ) override { + spin_mutex::scoped_lock l( my_mutex ); + my_successors.remove_successor(s); + return true; + } + + bool try_get( input_type &v ) override { + spin_mutex::scoped_lock l( my_mutex ); + if ( my_buffer_is_valid ) { + v = my_buffer; + return true; + } + return false; + } + + //! Reserves an item + bool try_reserve( T &v ) override { + return try_get(v); + } + + //! Releases the reserved item + bool try_release() override { return true; } + + //! Consumes the reserved item + bool try_consume() override { return true; } + + bool is_valid() { + spin_mutex::scoped_lock l( my_mutex ); + return my_buffer_is_valid; + } + + void clear() { + spin_mutex::scoped_lock l( my_mutex ); + my_buffer_is_valid = false; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task( const input_type &v ) override { + spin_mutex::scoped_lock l( my_mutex ); + return try_put_task_impl(v); + } + + graph_task * try_put_task_impl(const input_type &v) { + my_buffer = v; + my_buffer_is_valid = true; + graph_task* rtask = my_successors.try_put_task(v); + if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; + return rtask; + } + + graph& graph_reference() const override { + return my_graph; + } + + //! Breaks an infinite loop between the node reservation and register_successor call + struct register_predecessor_task : public graph_task { + register_predecessor_task( + graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) + : graph_task(g, allocator), o(owner), s(succ) {}; + + task* execute(execution_data& ed) override { + // TODO revamp: investigate why qualification is needed for register_successor() call + using tbb::detail::d1::register_predecessor; + using tbb::detail::d1::register_successor; + if ( !register_predecessor(s, o) ) { + register_successor(o, s); + } + finalize(ed); + return nullptr; + } + + predecessor_type& o; + successor_type& s; + }; + + spin_mutex my_mutex; + broadcast_cache< input_type, null_rw_mutex > my_successors; + input_type my_buffer; + bool my_buffer_is_valid; + + void reset_node( reset_flags f) override { + my_buffer_is_valid = false; + if (f&rf_clear_edges) { + my_successors.clear(); + } + } +}; // overwrite_node + +template< typename T > +class write_once_node : public overwrite_node<T> { +public: + typedef T input_type; + typedef T output_type; + typedef overwrite_node<T> base_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) { + fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + write_once_node(const node_set<Args...>& nodes) : write_once_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor: call base class copy constructor + __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) { + fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task *try_put_task( const T &v ) override { + spin_mutex::scoped_lock l( this->my_mutex ); + return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v); + } +}; // write_once_node + +inline void set_name(const graph& g, const char *name) { + fgt_graph_desc(&g, name); +} + +template <typename Output> +inline void set_name(const input_node<Output>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename Input, typename Output, typename Policy> +inline void set_name(const function_node<Input, Output, Policy>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename Output, typename Policy> +inline void set_name(const continue_node<Output,Policy>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const broadcast_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const buffer_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const queue_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const sequencer_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T, typename Compare> +inline void set_name(const priority_queue_node<T, Compare>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T, typename DecrementType> +inline void set_name(const limiter_node<T, DecrementType>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename OutputTuple, typename JP> +inline void set_name(const join_node<OutputTuple, JP>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename... Types> +inline void set_name(const indexer_node<Types...>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const overwrite_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const write_once_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template<typename Input, typename Output, typename Policy> +inline void set_name(const multifunction_node<Input, Output, Policy>& node, const char *name) { + fgt_multioutput_node_desc(&node, name); +} + +template<typename TupleType> +inline void set_name(const split_node<TupleType>& node, const char *name) { + fgt_multioutput_node_desc(&node, name); +} + +template< typename InputTuple, typename OutputTuple > +inline void set_name(const composite_node<InputTuple, OutputTuple>& node, const char *name) { + fgt_multiinput_multioutput_node_desc(&node, name); +} + +template<typename Input, typename Output, typename Policy> +inline void set_name(const async_node<Input, Output, Policy>& node, const char *name) +{ + fgt_multioutput_node_desc(&node, name); +} +} // d1 +} // detail +} // tbb + + +// Include deduction guides for node classes +#include "detail/_flow_graph_nodes_deduction.h" + +namespace tbb { +namespace flow { +inline namespace v1 { + using detail::d1::receiver; + using detail::d1::sender; + + using detail::d1::serial; + using detail::d1::unlimited; + + using detail::d1::reset_flags; + using detail::d1::rf_reset_protocol; + using detail::d1::rf_reset_bodies; + using detail::d1::rf_clear_edges; + + using detail::d1::graph; + using detail::d1::graph_node; + using detail::d1::continue_msg; + + using detail::d1::input_node; + using detail::d1::function_node; + using detail::d1::multifunction_node; + using detail::d1::split_node; + using detail::d1::output_port; + using detail::d1::indexer_node; + using detail::d1::tagged_msg; + using detail::d1::cast_to; + using detail::d1::is_a; + using detail::d1::continue_node; + using detail::d1::overwrite_node; + using detail::d1::write_once_node; + using detail::d1::broadcast_node; + using detail::d1::buffer_node; + using detail::d1::queue_node; + using detail::d1::sequencer_node; + using detail::d1::priority_queue_node; + using detail::d1::limiter_node; + using namespace detail::d1::graph_policy_namespace; + using detail::d1::join_node; + using detail::d1::input_port; + using detail::d1::copy_body; + using detail::d1::make_edge; + using detail::d1::remove_edge; + using detail::d1::tag_value; + using detail::d1::composite_node; + using detail::d1::async_node; + using detail::d1::node_priority_t; + using detail::d1::no_priority; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + using detail::d1::follows; + using detail::d1::precedes; + using detail::d1::make_node_set; + using detail::d1::make_edges; +#endif + +} // v1 +} // flow + + using detail::d1::flow_control; + +namespace profiling { + using detail::d1::set_name; +} // profiling + +} // tbb + + +#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) + // We don't do pragma pop here, since it still gives warning on the USER side + #undef __TBB_NOINLINE_SYM +#endif + +#endif // __TBB_flow_graph_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h index 121f167c4d..6ab5f7dbaf 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h @@ -1,51 +1,51 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_flow_graph_abstractions_H -#define __TBB_flow_graph_abstractions_H - -namespace tbb { -namespace detail { -namespace d1 { - -//! Pure virtual template classes that define interfaces for async communication -class graph_proxy { -public: - //! Inform a graph that messages may come from outside, to prevent premature graph completion - virtual void reserve_wait() = 0; - - //! Inform a graph that a previous call to reserve_wait is no longer in effect - virtual void release_wait() = 0; - - virtual ~graph_proxy() {} -}; - -template <typename Input> -class receiver_gateway : public graph_proxy { -public: - //! Type of inputing data into FG. - typedef Input input_type; - - //! Submit signal from an asynchronous activity to FG. - virtual bool try_put(const input_type&) = 0; -}; - -} // d1 - - -} // detail -} // tbb -#endif +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_abstractions_H +#define __TBB_flow_graph_abstractions_H + +namespace tbb { +namespace detail { +namespace d1 { + +//! Pure virtual template classes that define interfaces for async communication +class graph_proxy { +public: + //! Inform a graph that messages may come from outside, to prevent premature graph completion + virtual void reserve_wait() = 0; + + //! Inform a graph that a previous call to reserve_wait is no longer in effect + virtual void release_wait() = 0; + + virtual ~graph_proxy() {} +}; + +template <typename Input> +class receiver_gateway : public graph_proxy { +public: + //! Type of inputing data into FG. + typedef Input input_type; + + //! Submit signal from an asynchronous activity to FG. + virtual bool try_put(const input_type&) = 0; +}; + +} // d1 + + +} // detail +} // tbb +#endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/global_control.h b/contrib/libs/tbb/include/oneapi/tbb/global_control.h index 80177b6b82..ee31fe23c0 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/global_control.h +++ b/contrib/libs/tbb/include/oneapi/tbb/global_control.h @@ -1,188 +1,188 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_global_control_H -#define __TBB_global_control_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_assert.h" -#include "detail/_template_helpers.h" -#include "detail/_exception.h" - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -#include <new> // std::nothrow_t -#endif -#include <cstddef> - -namespace tbb { -namespace detail { - -namespace d1 { -class global_control; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -class task_scheduler_handle; -#endif -} - -namespace r1 { -void __TBB_EXPORTED_FUNC create(d1::global_control&); -void __TBB_EXPORTED_FUNC destroy(d1::global_control&); -std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); -struct global_control_impl; -struct control_storage_comparator; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -void release_impl(d1::task_scheduler_handle& handle); -bool finalize_impl(d1::task_scheduler_handle& handle); -void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); -bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); -#endif -} - -namespace d1 { - -class global_control { -public: - enum parameter { - max_allowed_parallelism, - thread_stack_size, - terminate_on_exception, -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - scheduler_handle, // not a public parameter -#else - reserved1, // not a public parameter -#endif - parameter_max // insert new parameters above this point - }; - - global_control(parameter p, std::size_t value) : - my_value(value), my_reserved(), my_param(p) { - suppress_unused_warning(my_reserved); - __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) - // For Windows 8 Store* apps it's impossible to set stack size - if (p==thread_stack_size) - return; -#elif __TBB_x86_64 && (_WIN32 || _WIN64) - if (p==thread_stack_size) - __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range"); -#endif - if (my_param==max_allowed_parallelism) - __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0."); - r1::create(*this); - } - - ~global_control() { - __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) - // For Windows 8 Store* apps it's impossible to set stack size - if (my_param==thread_stack_size) - return; -#endif - r1::destroy(*this); - } - - static std::size_t active_value(parameter p) { - __TBB_ASSERT(p < parameter_max, "Invalid parameter"); - return r1::global_control_active_value((int)p); - } - -private: - std::size_t my_value; - std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility - parameter my_param; - - friend struct r1::global_control_impl; - friend struct r1::control_storage_comparator; -}; - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -//! Finalization options. -//! Outside of the class to avoid extensive friendship. -static constexpr std::intptr_t release_nothrowing = 0; -static constexpr std::intptr_t finalize_nothrowing = 1; -static constexpr std::intptr_t finalize_throwing = 2; - -//! User side wrapper for a task scheduler lifetime control object -class task_scheduler_handle { -public: - task_scheduler_handle() = default; - ~task_scheduler_handle() { - release(*this); - } - - //! No copy - task_scheduler_handle(const task_scheduler_handle& other) = delete; - task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; - - //! Move only - task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} { - std::swap(m_ctl, other.m_ctl); - } - task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { - std::swap(m_ctl, other.m_ctl); - return *this; - }; - - //! Get and active instance of task_scheduler_handle - static task_scheduler_handle get() { - task_scheduler_handle handle; - r1::get(handle); - return handle; - } - - //! Release the reference and deactivate handle - static void release(task_scheduler_handle& handle) { - if (handle.m_ctl != nullptr) { - r1::finalize(handle, release_nothrowing); - } - } - -private: - friend void r1::release_impl(task_scheduler_handle& handle); - friend bool r1::finalize_impl(task_scheduler_handle& handle); - friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); - - global_control* m_ctl{nullptr}; -}; - -#if TBB_USE_EXCEPTIONS -//! Waits for worker threads termination. Throws exception on error. -inline void finalize(task_scheduler_handle& handle) { - r1::finalize(handle, finalize_throwing); -} -#endif -//! Waits for worker threads termination. Returns false on error. -inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { - return r1::finalize(handle, finalize_nothrowing); -} -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::global_control; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -using detail::d1::finalize; -using detail::d1::task_scheduler_handle; -using detail::r1::unsafe_wait; -#endif -} // namespace v1 - -} // namespace tbb - -#endif // __TBB_global_control_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_global_control_H +#define __TBB_global_control_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_template_helpers.h" +#include "detail/_exception.h" + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +#include <new> // std::nothrow_t +#endif +#include <cstddef> + +namespace tbb { +namespace detail { + +namespace d1 { +class global_control; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +class task_scheduler_handle; +#endif +} + +namespace r1 { +void __TBB_EXPORTED_FUNC create(d1::global_control&); +void __TBB_EXPORTED_FUNC destroy(d1::global_control&); +std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); +struct global_control_impl; +struct control_storage_comparator; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +void release_impl(d1::task_scheduler_handle& handle); +bool finalize_impl(d1::task_scheduler_handle& handle); +void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); +bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); +#endif +} + +namespace d1 { + +class global_control { +public: + enum parameter { + max_allowed_parallelism, + thread_stack_size, + terminate_on_exception, +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + scheduler_handle, // not a public parameter +#else + reserved1, // not a public parameter +#endif + parameter_max // insert new parameters above this point + }; + + global_control(parameter p, std::size_t value) : + my_value(value), my_reserved(), my_param(p) { + suppress_unused_warning(my_reserved); + __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + // For Windows 8 Store* apps it's impossible to set stack size + if (p==thread_stack_size) + return; +#elif __TBB_x86_64 && (_WIN32 || _WIN64) + if (p==thread_stack_size) + __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range"); +#endif + if (my_param==max_allowed_parallelism) + __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0."); + r1::create(*this); + } + + ~global_control() { + __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + // For Windows 8 Store* apps it's impossible to set stack size + if (my_param==thread_stack_size) + return; +#endif + r1::destroy(*this); + } + + static std::size_t active_value(parameter p) { + __TBB_ASSERT(p < parameter_max, "Invalid parameter"); + return r1::global_control_active_value((int)p); + } + +private: + std::size_t my_value; + std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility + parameter my_param; + + friend struct r1::global_control_impl; + friend struct r1::control_storage_comparator; +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! Finalization options. +//! Outside of the class to avoid extensive friendship. +static constexpr std::intptr_t release_nothrowing = 0; +static constexpr std::intptr_t finalize_nothrowing = 1; +static constexpr std::intptr_t finalize_throwing = 2; + +//! User side wrapper for a task scheduler lifetime control object +class task_scheduler_handle { +public: + task_scheduler_handle() = default; + ~task_scheduler_handle() { + release(*this); + } + + //! No copy + task_scheduler_handle(const task_scheduler_handle& other) = delete; + task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; + + //! Move only + task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} { + std::swap(m_ctl, other.m_ctl); + } + task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { + std::swap(m_ctl, other.m_ctl); + return *this; + }; + + //! Get and active instance of task_scheduler_handle + static task_scheduler_handle get() { + task_scheduler_handle handle; + r1::get(handle); + return handle; + } + + //! Release the reference and deactivate handle + static void release(task_scheduler_handle& handle) { + if (handle.m_ctl != nullptr) { + r1::finalize(handle, release_nothrowing); + } + } + +private: + friend void r1::release_impl(task_scheduler_handle& handle); + friend bool r1::finalize_impl(task_scheduler_handle& handle); + friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); + + global_control* m_ctl{nullptr}; +}; + +#if TBB_USE_EXCEPTIONS +//! Waits for worker threads termination. Throws exception on error. +inline void finalize(task_scheduler_handle& handle) { + r1::finalize(handle, finalize_throwing); +} +#endif +//! Waits for worker threads termination. Returns false on error. +inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { + return r1::finalize(handle, finalize_nothrowing); +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::global_control; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +using detail::d1::finalize; +using detail::d1::task_scheduler_handle; +using detail::r1::unsafe_wait; +#endif +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_global_control_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/info.h b/contrib/libs/tbb/include/oneapi/tbb/info.h index 21475a4d00..f08a7a8a9e 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/info.h +++ b/contrib/libs/tbb/include/oneapi/tbb/info.h @@ -1,137 +1,137 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_info_H -#define __TBB_info_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -#if __TBB_ARENA_BINDING -#include <vector> - -namespace tbb { -namespace detail { - -namespace d1{ - -using numa_node_id = int; -using core_type_id = int; - -// TODO: consider version approach to resolve backward compatibility potential issues. -struct constraints { -#if !__TBB_CPP20_PRESENT - constraints(numa_node_id id = -1, int maximal_concurrency = -1) - : numa_id(id) - , max_concurrency(maximal_concurrency) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - , core_type(-1) - , max_threads_per_core(-1) -#endif - {} -#endif /*!__TBB_CPP20_PRESENT*/ - - constraints& set_numa_id(numa_node_id id) { - numa_id = id; - return *this; - } - constraints& set_max_concurrency(int maximal_concurrency) { - max_concurrency = maximal_concurrency; - return *this; - } -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - constraints& set_core_type(core_type_id id) { - core_type = id; - return *this; - } - constraints& set_max_threads_per_core(int threads_number) { - max_threads_per_core = threads_number; - return *this; - } -#endif - - numa_node_id numa_id = -1; - int max_concurrency = -1; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - core_type_id core_type = -1; - int max_threads_per_core = -1; -#endif -}; - -} // namespace d1 - -namespace r1 { -unsigned __TBB_EXPORTED_FUNC numa_node_count(); -void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); -int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); - -// Reserved fields are required to save binary backward compatibility in case of future changes. -// They must be defined to 0 at this moment. -unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); -void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); - -int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); -int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); -} // namespace r1 - -namespace d1 { - -inline std::vector<numa_node_id> numa_nodes() { - std::vector<numa_node_id> node_indices(r1::numa_node_count()); - r1::fill_numa_indices(node_indices.data()); - return node_indices; -} - -inline int default_concurrency(numa_node_id id = -1) { - return r1::numa_default_concurrency(id); -} - -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT -inline std::vector<core_type_id> core_types() { - std::vector<int> core_type_indexes(r1::core_type_count()); - r1::fill_core_type_indices(core_type_indexes.data()); - return core_type_indexes; -} - -inline int default_concurrency(constraints c) { - return r1::constraints_default_concurrency(c); -} -#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::numa_node_id; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT -using detail::d1::core_type_id; -#endif - -namespace info { -using detail::d1::numa_nodes; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT -using detail::d1::core_types; -#endif - -using detail::d1::default_concurrency; -} // namespace info -} // namespace v1 - -} // namespace tbb - -#endif /*__TBB_ARENA_BINDING*/ - -#endif /*__TBB_info_H*/ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_info_H +#define __TBB_info_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#if __TBB_ARENA_BINDING +#include <vector> + +namespace tbb { +namespace detail { + +namespace d1{ + +using numa_node_id = int; +using core_type_id = int; + +// TODO: consider version approach to resolve backward compatibility potential issues. +struct constraints { +#if !__TBB_CPP20_PRESENT + constraints(numa_node_id id = -1, int maximal_concurrency = -1) + : numa_id(id) + , max_concurrency(maximal_concurrency) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + , core_type(-1) + , max_threads_per_core(-1) +#endif + {} +#endif /*!__TBB_CPP20_PRESENT*/ + + constraints& set_numa_id(numa_node_id id) { + numa_id = id; + return *this; + } + constraints& set_max_concurrency(int maximal_concurrency) { + max_concurrency = maximal_concurrency; + return *this; + } +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + constraints& set_core_type(core_type_id id) { + core_type = id; + return *this; + } + constraints& set_max_threads_per_core(int threads_number) { + max_threads_per_core = threads_number; + return *this; + } +#endif + + numa_node_id numa_id = -1; + int max_concurrency = -1; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + core_type_id core_type = -1; + int max_threads_per_core = -1; +#endif +}; + +} // namespace d1 + +namespace r1 { +unsigned __TBB_EXPORTED_FUNC numa_node_count(); +void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); +int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); + +// Reserved fields are required to save binary backward compatibility in case of future changes. +// They must be defined to 0 at this moment. +unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); +void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); + +int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); +int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); +} // namespace r1 + +namespace d1 { + +inline std::vector<numa_node_id> numa_nodes() { + std::vector<numa_node_id> node_indices(r1::numa_node_count()); + r1::fill_numa_indices(node_indices.data()); + return node_indices; +} + +inline int default_concurrency(numa_node_id id = -1) { + return r1::numa_default_concurrency(id); +} + +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +inline std::vector<core_type_id> core_types() { + std::vector<int> core_type_indexes(r1::core_type_count()); + r1::fill_core_type_indices(core_type_indexes.data()); + return core_type_indexes; +} + +inline int default_concurrency(constraints c) { + return r1::constraints_default_concurrency(c); +} +#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::numa_node_id; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +using detail::d1::core_type_id; +#endif + +namespace info { +using detail::d1::numa_nodes; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +using detail::d1::core_types; +#endif + +using detail::d1::default_concurrency; +} // namespace info +} // namespace v1 + +} // namespace tbb + +#endif /*__TBB_ARENA_BINDING*/ + +#endif /*__TBB_info_H*/ diff --git a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h index 6e913c6713..667d70103f 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h +++ b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h @@ -1,272 +1,272 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_memory_pool_H -#define __TBB_memory_pool_H - -#if !TBB_PREVIEW_MEMORY_POOL -#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h -#endif -/** @file */ - -#include "scalable_allocator.h" - -#include <new> // std::bad_alloc -#include <stdexcept> // std::runtime_error, std::invalid_argument -#include <utility> // std::forward - - -#if __TBB_EXTRA_DEBUG -#define __TBBMALLOC_ASSERT ASSERT -#else -#define __TBBMALLOC_ASSERT(a,b) ((void)0) -#endif - -namespace tbb { -namespace detail { -namespace d1 { - -//! Base of thread-safe pool allocator for variable-size requests -class pool_base : no_copy { - // Pool interface is separate from standard allocator classes because it has - // to maintain internal state, no copy or assignment. Move and swap are possible. -public: - //! Reset pool to reuse its memory (free all objects at once) - void recycle() { rml::pool_reset(my_pool); } - - //! The "malloc" analogue to allocate block of memory of size bytes - void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); } - - //! The "free" analogue to discard a previously allocated piece of memory. - void free(void* ptr) { rml::pool_free(my_pool, ptr); } - - //! The "realloc" analogue complementing pool_malloc. - // Enables some low-level optimization possibilities - void *realloc(void* ptr, size_t size) { - return rml::pool_realloc(my_pool, ptr, size); - } - -protected: - //! destroy pool - must be called in a child class - void destroy() { rml::pool_destroy(my_pool); } - - rml::MemoryPool *my_pool; -}; - -#if _MSC_VER && !defined(__INTEL_COMPILER) - // Workaround for erroneous "unreferenced parameter" warning in method destroy. - #pragma warning (push) - #pragma warning (disable: 4100) -#endif - -//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 -/** @ingroup memory_allocation */ -template<typename T, typename P = pool_base> -class memory_pool_allocator { -protected: - typedef P pool_type; - pool_type *my_pool; - template<typename U, typename R> - friend class memory_pool_allocator; - template<typename V, typename U, typename R> - friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); - template<typename V, typename U, typename R> - friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); -public: - typedef T value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - template<typename U> struct rebind { - typedef memory_pool_allocator<U, P> other; - }; - - explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} - memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} - template<typename U> - memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - //! Allocate space for n objects. - pointer allocate( size_type n, const void* /*hint*/ = 0) { - pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) ); - if (!p) - throw_exception(std::bad_alloc()); - return p; - } - //! Free previously allocated block of memory. - void deallocate( pointer p, size_type ) { - my_pool->free(p); - } - //! Largest value for which method allocate might succeed. - size_type max_size() const throw() { - size_type max = static_cast<size_type>(-1) / sizeof (value_type); - return (max > 0 ? max : 1); - } - //! Copy-construct value at location pointed to by p. - - template<typename U, typename... Args> - void construct(U *p, Args&&... args) - { ::new((void *)p) U(std::forward<Args>(args)...); } - - //! Destroy value at location pointed to by p. - void destroy( pointer p ) { p->~value_type(); } - -}; - -#if _MSC_VER && !defined(__INTEL_COMPILER) - #pragma warning (pop) -#endif // warning 4100 is back - -//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1 -/** @ingroup memory_allocation */ -template<typename P> -class memory_pool_allocator<void, P> { -public: - typedef P pool_type; - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; - template<typename U> struct rebind { - typedef memory_pool_allocator<U, P> other; - }; - - explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} - memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} - template<typename U> - memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} - -protected: - pool_type *my_pool; - template<typename U, typename R> - friend class memory_pool_allocator; - template<typename V, typename U, typename R> - friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); - template<typename V, typename U, typename R> - friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); -}; - -template<typename T, typename U, typename P> -inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;} - -template<typename T, typename U, typename P> -inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;} - -//! Thread-safe growable pool allocator for variable-size requests -template <typename Alloc> -class memory_pool : public pool_base { - Alloc my_alloc; // TODO: base-class optimization - static void *allocate_request(intptr_t pool_id, size_t & bytes); - static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes); - -public: - //! construct pool with underlying allocator - explicit memory_pool(const Alloc &src = Alloc()); - - //! destroy pool - ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter -}; - -class fixed_pool : public pool_base { - void *my_buffer; - size_t my_size; - inline static void *allocate_request(intptr_t pool_id, size_t & bytes); - -public: - //! construct pool with underlying allocator - inline fixed_pool(void *buf, size_t size); - //! destroy pool - ~fixed_pool() { destroy(); } -}; - -//////////////// Implementation /////////////// - -template <typename Alloc> -memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) { - rml::MemPoolPolicy args(allocate_request, deallocate_request, - sizeof(typename Alloc::value_type)); - rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); - if (res!=rml::POOL_OK) - throw_exception(std::runtime_error("Can't create pool")); -} -template <typename Alloc> -void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) { - memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); - const size_t unit_size = sizeof(typename Alloc::value_type); - __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); - void *ptr; -#if TBB_USE_EXCEPTIONS - try { -#endif - ptr = self.my_alloc.allocate( bytes/unit_size ); -#if TBB_USE_EXCEPTIONS - } catch(...) { - return 0; - } -#endif - return ptr; -} -#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED - // Workaround for erroneous "unreachable code" warning in the template below. - // Specific for VC++ 17-18 compiler - #pragma warning (push) - #pragma warning (disable: 4702) -#endif -template <typename Alloc> -int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) { - memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); - const size_t unit_size = sizeof(typename Alloc::value_type); - __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL); - self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size ); - return 0; -} -#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED - #pragma warning (pop) -#endif -inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) { - if (!buf || !size) - // TODO: improve support for mode with exceptions disabled - throw_exception(std::invalid_argument("Zero in parameter is invalid")); - rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); - rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); - if (res!=rml::POOL_OK) - throw_exception(std::runtime_error("Can't create pool")); -} -inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { - fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id); - __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice."); - bytes = self.my_size; - self.my_size = 0; // remember that buffer has been used - return self.my_buffer; -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::memory_pool_allocator; -using detail::d1::memory_pool; -using detail::d1::fixed_pool; -} // inline namepspace v1 -} // namespace tbb - -#undef __TBBMALLOC_ASSERT -#endif// __TBB_memory_pool_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_memory_pool_H +#define __TBB_memory_pool_H + +#if !TBB_PREVIEW_MEMORY_POOL +#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h +#endif +/** @file */ + +#include "scalable_allocator.h" + +#include <new> // std::bad_alloc +#include <stdexcept> // std::runtime_error, std::invalid_argument +#include <utility> // std::forward + + +#if __TBB_EXTRA_DEBUG +#define __TBBMALLOC_ASSERT ASSERT +#else +#define __TBBMALLOC_ASSERT(a,b) ((void)0) +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +//! Base of thread-safe pool allocator for variable-size requests +class pool_base : no_copy { + // Pool interface is separate from standard allocator classes because it has + // to maintain internal state, no copy or assignment. Move and swap are possible. +public: + //! Reset pool to reuse its memory (free all objects at once) + void recycle() { rml::pool_reset(my_pool); } + + //! The "malloc" analogue to allocate block of memory of size bytes + void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); } + + //! The "free" analogue to discard a previously allocated piece of memory. + void free(void* ptr) { rml::pool_free(my_pool, ptr); } + + //! The "realloc" analogue complementing pool_malloc. + // Enables some low-level optimization possibilities + void *realloc(void* ptr, size_t size) { + return rml::pool_realloc(my_pool, ptr, size); + } + +protected: + //! destroy pool - must be called in a child class + void destroy() { rml::pool_destroy(my_pool); } + + rml::MemoryPool *my_pool; +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Workaround for erroneous "unreferenced parameter" warning in method destroy. + #pragma warning (push) + #pragma warning (disable: 4100) +#endif + +//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 +/** @ingroup memory_allocation */ +template<typename T, typename P = pool_base> +class memory_pool_allocator { +protected: + typedef P pool_type; + pool_type *my_pool; + template<typename U, typename R> + friend class memory_pool_allocator; + template<typename V, typename U, typename R> + friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); + template<typename V, typename U, typename R> + friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template<typename U> struct rebind { + typedef memory_pool_allocator<U, P> other; + }; + + explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} + memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + template<typename U> + memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + //! Allocate space for n objects. + pointer allocate( size_type n, const void* /*hint*/ = 0) { + pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) ); + if (!p) + throw_exception(std::bad_alloc()); + return p; + } + //! Free previously allocated block of memory. + void deallocate( pointer p, size_type ) { + my_pool->free(p); + } + //! Largest value for which method allocate might succeed. + size_type max_size() const throw() { + size_type max = static_cast<size_type>(-1) / sizeof (value_type); + return (max > 0 ? max : 1); + } + //! Copy-construct value at location pointed to by p. + + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new((void *)p) U(std::forward<Args>(args)...); } + + //! Destroy value at location pointed to by p. + void destroy( pointer p ) { p->~value_type(); } + +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) +#endif // warning 4100 is back + +//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1 +/** @ingroup memory_allocation */ +template<typename P> +class memory_pool_allocator<void, P> { +public: + typedef P pool_type; + typedef void* pointer; + typedef const void* const_pointer; + typedef void value_type; + template<typename U> struct rebind { + typedef memory_pool_allocator<U, P> other; + }; + + explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} + memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + template<typename U> + memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + +protected: + pool_type *my_pool; + template<typename U, typename R> + friend class memory_pool_allocator; + template<typename V, typename U, typename R> + friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); + template<typename V, typename U, typename R> + friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); +}; + +template<typename T, typename U, typename P> +inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;} + +template<typename T, typename U, typename P> +inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;} + +//! Thread-safe growable pool allocator for variable-size requests +template <typename Alloc> +class memory_pool : public pool_base { + Alloc my_alloc; // TODO: base-class optimization + static void *allocate_request(intptr_t pool_id, size_t & bytes); + static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes); + +public: + //! construct pool with underlying allocator + explicit memory_pool(const Alloc &src = Alloc()); + + //! destroy pool + ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter +}; + +class fixed_pool : public pool_base { + void *my_buffer; + size_t my_size; + inline static void *allocate_request(intptr_t pool_id, size_t & bytes); + +public: + //! construct pool with underlying allocator + inline fixed_pool(void *buf, size_t size); + //! destroy pool + ~fixed_pool() { destroy(); } +}; + +//////////////// Implementation /////////////// + +template <typename Alloc> +memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) { + rml::MemPoolPolicy args(allocate_request, deallocate_request, + sizeof(typename Alloc::value_type)); + rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); + if (res!=rml::POOL_OK) + throw_exception(std::runtime_error("Can't create pool")); +} +template <typename Alloc> +void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) { + memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); + const size_t unit_size = sizeof(typename Alloc::value_type); + __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); + void *ptr; +#if TBB_USE_EXCEPTIONS + try { +#endif + ptr = self.my_alloc.allocate( bytes/unit_size ); +#if TBB_USE_EXCEPTIONS + } catch(...) { + return 0; + } +#endif + return ptr; +} +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + // Workaround for erroneous "unreachable code" warning in the template below. + // Specific for VC++ 17-18 compiler + #pragma warning (push) + #pragma warning (disable: 4702) +#endif +template <typename Alloc> +int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) { + memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); + const size_t unit_size = sizeof(typename Alloc::value_type); + __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL); + self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size ); + return 0; +} +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + #pragma warning (pop) +#endif +inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) { + if (!buf || !size) + // TODO: improve support for mode with exceptions disabled + throw_exception(std::invalid_argument("Zero in parameter is invalid")); + rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); + rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); + if (res!=rml::POOL_OK) + throw_exception(std::runtime_error("Can't create pool")); +} +inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { + fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id); + __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice."); + bytes = self.my_size; + self.my_size = 0; // remember that buffer has been used + return self.my_buffer; +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::memory_pool_allocator; +using detail::d1::memory_pool; +using detail::d1::fixed_pool; +} // inline namepspace v1 +} // namespace tbb + +#undef __TBBMALLOC_ASSERT +#endif// __TBB_memory_pool_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h index 8fab863db3..d0e9e3acbb 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h @@ -1,79 +1,79 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_null_mutex_H -#define __TBB_null_mutex_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! A mutex which does nothing -/** A null_mutex does no operation and simulates success. - @ingroup synchronization */ -class null_mutex { -public: - //! Constructors - constexpr null_mutex() noexcept = default; - - //! Destructor - ~null_mutex() = default; - - //! No Copy - null_mutex(const null_mutex&) = delete; - null_mutex& operator=(const null_mutex&) = delete; - - //! Represents acquisition of a mutex. - class scoped_lock { - public: - //! Constructors - constexpr scoped_lock() noexcept = default; - scoped_lock(null_mutex&) {} - - //! Destructor - ~scoped_lock() = default; - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - void acquire(null_mutex&) {} - bool try_acquire(null_mutex&) { return true; } - void release() {} - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = false; - static constexpr bool is_recursive_mutex = true; - static constexpr bool is_fair_mutex = true; - - void lock() {} - bool try_lock() { return true; } - void unlock() {} -}; // class null_mutex - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::null_mutex; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_null_mutex_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_null_mutex_H +#define __TBB_null_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A mutex which does nothing +/** A null_mutex does no operation and simulates success. + @ingroup synchronization */ +class null_mutex { +public: + //! Constructors + constexpr null_mutex() noexcept = default; + + //! Destructor + ~null_mutex() = default; + + //! No Copy + null_mutex(const null_mutex&) = delete; + null_mutex& operator=(const null_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + //! Constructors + constexpr scoped_lock() noexcept = default; + scoped_lock(null_mutex&) {} + + //! Destructor + ~scoped_lock() = default; + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + void acquire(null_mutex&) {} + bool try_acquire(null_mutex&) { return true; } + void release() {} + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = true; + static constexpr bool is_fair_mutex = true; + + void lock() {} + bool try_lock() { return true; } + void unlock() {} +}; // class null_mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::null_mutex; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_null_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h index 8046bc405d..9d0f8da2a1 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h @@ -1,84 +1,84 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_null_rw_mutex_H -#define __TBB_null_rw_mutex_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! A rw mutex which does nothing -/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation. - @ingroup synchronization */ -class null_rw_mutex { -public: - //! Constructors - constexpr null_rw_mutex() noexcept = default; - - //! Destructor - ~null_rw_mutex() = default; - - //! No Copy - null_rw_mutex(const null_rw_mutex&) = delete; - null_rw_mutex& operator=(const null_rw_mutex&) = delete; - - //! Represents acquisition of a mutex. - class scoped_lock { - public: - //! Constructors - constexpr scoped_lock() noexcept = default; - scoped_lock(null_rw_mutex&, bool = true) {} - - //! Destructor - ~scoped_lock() = default; - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - void acquire(null_rw_mutex&, bool = true) {} - bool try_acquire(null_rw_mutex&, bool = true) { return true; } - void release() {} - bool upgrade_to_writer() { return true; } - bool downgrade_to_reader() { return true; } - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = true; - static constexpr bool is_recursive_mutex = true; - static constexpr bool is_fair_mutex = true; - - void lock() {} - bool try_lock() { return true; } - void unlock() {} - void lock_shared() {} - bool try_lock_shared() { return true; } - void unlock_shared() {} -}; // class null_rw_mutex - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::null_rw_mutex; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_null_rw_mutex_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_null_rw_mutex_H +#define __TBB_null_rw_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A rw mutex which does nothing +/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation. + @ingroup synchronization */ +class null_rw_mutex { +public: + //! Constructors + constexpr null_rw_mutex() noexcept = default; + + //! Destructor + ~null_rw_mutex() = default; + + //! No Copy + null_rw_mutex(const null_rw_mutex&) = delete; + null_rw_mutex& operator=(const null_rw_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + //! Constructors + constexpr scoped_lock() noexcept = default; + scoped_lock(null_rw_mutex&, bool = true) {} + + //! Destructor + ~scoped_lock() = default; + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + void acquire(null_rw_mutex&, bool = true) {} + bool try_acquire(null_rw_mutex&, bool = true) { return true; } + void release() {} + bool upgrade_to_writer() { return true; } + bool downgrade_to_reader() { return true; } + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = true; + static constexpr bool is_fair_mutex = true; + + void lock() {} + bool try_lock() { return true; } + void unlock() {} + void lock_shared() {} + bool try_lock_shared() { return true; } + void unlock_shared() {} +}; // class null_rw_mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::null_rw_mutex; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_null_rw_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h index ed137d4d09..0dc774e078 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h @@ -1,416 +1,416 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_for_H -#define __TBB_parallel_for_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_exception.h" -#include "detail/_task.h" -#include "detail/_small_object_pool.h" -#include "profiling.h" - -#include "partitioner.h" -#include "blocked_range.h" -#include "task_group.h" - -#include <cstddef> -#include <new> - -namespace tbb { -namespace detail { -namespace d1 { - -//! Task type used in parallel_for -/** @ingroup algorithms */ -template<typename Range, typename Body, typename Partitioner> -struct start_for : public task { - Range my_range; - const Body my_body; - node* my_parent; - - typename Partitioner::task_partition_type my_partition; - small_object_allocator my_allocator; - - task* execute(execution_data&) override; - task* cancel(execution_data&) override; - void finalize(const execution_data&); - - //! Constructor for root task. - start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : - my_range(range), - my_body(body), - my_partition(partitioner), - my_allocator(alloc) {} - //! Splitting constructor used to generate children. - /** parent_ becomes left child. Newly constructed object is right child. */ - start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : - my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), - my_body(parent_.my_body), - my_partition(parent_.my_partition, split_obj), - my_allocator(alloc) {} - //! Construct right child from the given range as response to the demand. - /** parent_ remains left child. Newly constructed object is right child. */ - start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : - my_range(r), - my_body(parent_.my_body), - my_partition(parent_.my_partition, split()), - my_allocator(alloc) - { - my_partition.align_depth( d ); - } - static void run(const Range& range, const Body& body, Partitioner& partitioner) { - task_group_context context(PARALLEL_FOR); - run(range, body, partitioner, context); - } - - static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) { - if ( !range.empty() ) { - small_object_allocator alloc{}; - start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc); - - // defer creation of the wait node until task allocation succeeds - wait_node wn; - for_task.my_parent = &wn; - execute_and_wait(for_task, context, wn.m_wait, context); - } - } - //! Run body for range, serves as callback for partitioner - void run_body( Range &r ) { - my_body( r ); - } - - //! spawn right task, serves as callback for partitioner - void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { - offer_work_impl(ed, *this, split_obj); - } - - //! spawn right task, serves as callback for partitioner - void offer_work(const Range& r, depth_t d, execution_data& ed) { - offer_work_impl(ed, *this, r, d); - } - -private: - template <typename... Args> - void offer_work_impl(execution_data& ed, Args&&... constructor_args) { - // New right child - small_object_allocator alloc{}; - start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc); - - // New root node as a continuation and ref count. Left and right child attach to the new parent. - right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc); - // Spawn the right sibling - right_child.spawn_self(ed); - } - - void spawn_self(execution_data& ed) { - my_partition.spawn_task(*this, *context(ed)); - } -}; - -//! fold the tree and deallocate the task -template<typename Range, typename Body, typename Partitioner> -void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) { - // Get the current parent and allocator an object destruction - node* parent = my_parent; - auto allocator = my_allocator; - // Task execution finished - destroy it - this->~start_for(); - // Unwind the tree decrementing the parent`s reference count - - fold_tree<tree_node>(parent, ed); - allocator.deallocate(this, ed); - -} - -//! execute task for parallel_for -template<typename Range, typename Body, typename Partitioner> -task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) { - if (!is_same_affinity(ed)) { - my_partition.note_affinity(execution_slot(ed)); - } - my_partition.check_being_stolen(*this, ed); - my_partition.execute(*this, my_range, ed); - finalize(ed); - return nullptr; -} - -//! cancel task for parallel_for -template<typename Range, typename Body, typename Partitioner> -task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) { - finalize(ed); - return nullptr; -} - -//! Calls the function with values from range [begin, end) with a step provided -template<typename Function, typename Index> -class parallel_for_body : detail::no_assign { - const Function &my_func; - const Index my_begin; - const Index my_step; -public: - parallel_for_body( const Function& _func, Index& _begin, Index& _step ) - : my_func(_func), my_begin(_begin), my_step(_step) {} - - void operator()( const blocked_range<Index>& r ) const { - // A set of local variables to help the compiler with vectorization of the following loop. - Index b = r.begin(); - Index e = r.end(); - Index ms = my_step; - Index k = my_begin + b*ms; - -#if __INTEL_COMPILER -#pragma ivdep -#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE -#pragma vector always assert -#endif -#endif - for ( Index i = b; i < e; ++i, k += ms ) { - my_func( k ); - } - } -}; - -// Requirements on Range concept are documented in blocked_range.h - -/** \page parallel_for_body_req Requirements on parallel_for body - Class \c Body implementing the concept of parallel_for body must define: - - \code Body::Body( const Body& ); \endcode Copy constructor - - \code Body::~Body(); \endcode Destructor - - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r. -**/ - -/** \name parallel_for - See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ -//@{ - -//! Parallel iteration over range with default partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body ) { - start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); -} - -//! Parallel iteration over range with simple partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { - start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); -} - -//! Parallel iteration over range with auto_partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { - start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); -} - -//! Parallel iteration over range with static_partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { - start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); -} - -//! Parallel iteration over range with affinity_partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { - start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); -} - -//! Parallel iteration over range with default partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, task_group_context& context ) { - start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); -} - -//! Parallel iteration over range with simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { - start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); -} - -//! Parallel iteration over range with auto_partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { - start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); -} - -//! Parallel iteration over range with static_partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { - start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); -} - -//! Parallel iteration over range with affinity_partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { - start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); -} - -//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner -template <typename Index, typename Function, typename Partitioner> -void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { - if (step <= 0 ) - throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument - else if (last > first) { - // Above "else" avoids "potential divide by zero" warning on some platforms - Index end = (last - first - Index(1)) / step + Index(1); - blocked_range<Index> range(static_cast<Index>(0), end); - parallel_for_body<Function, Index> body(f, first, step); - parallel_for(range, body, partitioner); - } -} - -//! Parallel iteration over a range of integers with a step provided and default partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); -} -//! Parallel iteration over a range of integers with a step provided and simple partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { - parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); -} -//! Parallel iteration over a range of integers with a step provided and auto partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); -} -//! Parallel iteration over a range of integers with a step provided and static partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { - parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); -} -//! Parallel iteration over a range of integers with a step provided and affinity partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { - parallel_for_impl(first, last, step, f, partitioner); -} - -//! Parallel iteration over a range of integers with a default step value and default partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); -} -//! Parallel iteration over a range of integers with a default step value and simple partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { - parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); -} -//! Parallel iteration over a range of integers with a default step value and auto partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); -} -//! Parallel iteration over a range of integers with a default step value and static partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { - parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); -} -//! Parallel iteration over a range of integers with a default step value and affinity partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { - parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); -} - -//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner -template <typename Index, typename Function, typename Partitioner> -void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { - if (step <= 0 ) - throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument - else if (last > first) { - // Above "else" avoids "potential divide by zero" warning on some platforms - Index end = (last - first - Index(1)) / step + Index(1); - blocked_range<Index> range(static_cast<Index>(0), end); - parallel_for_body<Function, Index> body(f, first, step); - parallel_for(range, body, partitioner, context); - } -} - -//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); -} -//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); -} -//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); -} -//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); -} -//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { - parallel_for_impl(first, last, step, f, partitioner, context); -} - -//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); -} -//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); -} -//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); -} -//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { - parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); -} -//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner -template <typename Index, typename Function> -void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { - parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); -} -// @} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::parallel_for; -// Split types -using detail::split; -using detail::proportional_split; -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_parallel_for_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_for_H +#define __TBB_parallel_for_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" +#include "profiling.h" + +#include "partitioner.h" +#include "blocked_range.h" +#include "task_group.h" + +#include <cstddef> +#include <new> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Task type used in parallel_for +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_for : public task { + Range my_range; + const Body my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + //! Constructor for root task. + start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : + my_range(range), + my_body(body), + my_partition(partitioner), + my_allocator(alloc) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc) {} + //! Construct right child from the given range as response to the demand. + /** parent_ remains left child. Newly constructed object is right child. */ + start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : + my_range(r), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split()), + my_allocator(alloc) + { + my_partition.align_depth( d ); + } + static void run(const Range& range, const Body& body, Partitioner& partitioner) { + task_group_context context(PARALLEL_FOR); + run(range, body, partitioner, context); + } + + static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + small_object_allocator alloc{}; + start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc); + + // defer creation of the wait node until task allocation succeeds + wait_node wn; + for_task.my_parent = &wn; + execute_and_wait(for_task, context, wn.m_wait, context); + } + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + my_body( r ); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(const Range& r, depth_t d, execution_data& ed) { + offer_work_impl(ed, *this, r, d); + } + +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... constructor_args) { + // New right child + small_object_allocator alloc{}; + start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc); + + // New root node as a continuation and ref count. Left and right child attach to the new parent. + right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc); + // Spawn the right sibling + right_child.spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and allocator an object destruction + node* parent = my_parent; + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_for(); + // Unwind the tree decrementing the parent`s reference count + + fold_tree<tree_node>(parent, ed); + allocator.deallocate(this, ed); + +} + +//! execute task for parallel_for +template<typename Range, typename Body, typename Partitioner> +task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + my_partition.execute(*this, my_range, ed); + finalize(ed); + return nullptr; +} + +//! cancel task for parallel_for +template<typename Range, typename Body, typename Partitioner> +task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//! Calls the function with values from range [begin, end) with a step provided +template<typename Function, typename Index> +class parallel_for_body : detail::no_assign { + const Function &my_func; + const Index my_begin; + const Index my_step; +public: + parallel_for_body( const Function& _func, Index& _begin, Index& _step ) + : my_func(_func), my_begin(_begin), my_step(_step) {} + + void operator()( const blocked_range<Index>& r ) const { + // A set of local variables to help the compiler with vectorization of the following loop. + Index b = r.begin(); + Index e = r.end(); + Index ms = my_step; + Index k = my_begin + b*ms; + +#if __INTEL_COMPILER +#pragma ivdep +#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE +#pragma vector always assert +#endif +#endif + for ( Index i = b; i < e; ++i, k += ms ) { + my_func( k ); + } + } +}; + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_for_body_req Requirements on parallel_for body + Class \c Body implementing the concept of parallel_for body must define: + - \code Body::Body( const Body& ); \endcode Copy constructor + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r. +**/ + +/** \name parallel_for + See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ +//@{ + +//! Parallel iteration over range with default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body ) { + start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); +} + +//! Parallel iteration over range with simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { + start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with auto_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { + start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with static_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { + start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with affinity_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { + start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, task_group_context& context ) { + start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); +} + +//! Parallel iteration over range with simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with auto_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with static_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with affinity_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); +} + +//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner +template <typename Index, typename Function, typename Partitioner> +void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { + if (step <= 0 ) + throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument + else if (last > first) { + // Above "else" avoids "potential divide by zero" warning on some platforms + Index end = (last - first - Index(1)) / step + Index(1); + blocked_range<Index> range(static_cast<Index>(0), end); + parallel_for_body<Function, Index> body(f, first, step); + parallel_for(range, body, partitioner); + } +} + +//! Parallel iteration over a range of integers with a step provided and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); +} +//! Parallel iteration over a range of integers with a step provided and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { + parallel_for_impl(first, last, step, f, partitioner); +} + +//! Parallel iteration over a range of integers with a default step value and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); +} +//! Parallel iteration over a range of integers with a default step value and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { + parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); +} + +//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner +template <typename Index, typename Function, typename Partitioner> +void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { + if (step <= 0 ) + throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument + else if (last > first) { + // Above "else" avoids "potential divide by zero" warning on some platforms + Index end = (last - first - Index(1)) / step + Index(1); + blocked_range<Index> range(static_cast<Index>(0), end); + parallel_for_body<Function, Index> body(f, first, step); + parallel_for(range, body, partitioner, context); + } +} + +//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { + parallel_for_impl(first, last, step, f, partitioner, context); +} + +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { + parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); +} +// @} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_for; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_for_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h index 563e00f5fc..0050b1746c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h @@ -1,644 +1,644 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_for_each_H -#define __TBB_parallel_for_each_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_exception.h" -#include "detail/_task.h" -#include "detail/_aligned_space.h" -#include "detail/_small_object_pool.h" - -#include "parallel_for.h" -#include "task_group.h" // task_group_context - -#include <iterator> -#include <type_traits> - -namespace tbb { -namespace detail { -namespace d2 { -template<typename Body, typename Item> class feeder_impl; -} // namespace d2 - -namespace d1 { -//! Class the user supplied algorithm body uses to add new tasks -template<typename Item> -class feeder { - feeder() {} - feeder(const feeder&) = delete; - void operator=( const feeder&) = delete; - - virtual ~feeder () {} - virtual void internal_add_copy(const Item& item) = 0; - virtual void internal_add_move(Item&& item) = 0; - - template<typename Body_, typename Item_> friend class detail::d2::feeder_impl; -public: - //! Add a work item to a running parallel_for_each. - void add(const Item& item) {internal_add_copy(item);} - void add(Item&& item) {internal_add_move(std::move(item));} -}; - -} // namespace d1 - -namespace d2 { -using namespace tbb::detail::d1; -/** Selects one of the two possible forms of function call member operator. - @ingroup algorithms **/ -template<class Body> -struct parallel_for_each_operator_selector { -public: - template<typename ItemArg, typename FeederArg> - static auto call(const Body& body, ItemArg&& item, FeederArg*) - -> decltype(body(std::forward<ItemArg>(item)), void()) { - #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - // Suppression of Microsoft non-standard extension warnings - #pragma warning (push) - #pragma warning (disable: 4239) - #endif - - body(std::forward<ItemArg>(item)); - - #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - #pragma warning (push) - #endif - } - - template<typename ItemArg, typename FeederArg> - static auto call(const Body& body, ItemArg&& item, FeederArg* feeder) - -> decltype(body(std::forward<ItemArg>(item), *feeder), void()) { - #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - // Suppression of Microsoft non-standard extension warnings - #pragma warning (push) - #pragma warning (disable: 4239) - #endif - __TBB_ASSERT(feeder, "Feeder was not created but should be"); - - body(std::forward<ItemArg>(item), *feeder); - - #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - #pragma warning (push) - #endif - } -}; - -template<typename Body, typename Item> -struct feeder_item_task: public task { - using feeder_type = feeder_impl<Body, Item>; - - template <typename ItemType> - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : - item(std::forward<ItemType>(input_item)), - my_feeder(feeder), - my_allocator(alloc) - {} - - void finalize(const execution_data& ed) { - my_feeder.my_wait_context.release(); - my_allocator.delete_object(this, ed); - } - - //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy - //! Executing body with moving the copy should have higher priority - using first_priority = int; - using second_priority = double; - - template <typename BodyType, typename ItemType, typename FeederType> - static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority) - -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void()) - { - parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder); - } - - template <typename BodyType, typename ItemType, typename FeederType> - static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) { - parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder); - } - - task* execute(execution_data& ed) override { - call(my_feeder.my_body, item, my_feeder, first_priority{}); - finalize(ed); - return nullptr; - } - - task* cancel(execution_data& ed) override { - finalize(ed); - return nullptr; - } - - Item item; - feeder_type& my_feeder; - small_object_allocator my_allocator; -}; // class feeder_item_task - -/** Implements new task adding procedure. - @ingroup algorithms **/ -template<typename Body, typename Item> -class feeder_impl : public feeder<Item> { - // Avoiding use of copy constructor in a virtual method if the type does not support it - void internal_add_copy_impl(std::true_type, const Item& item) { - using feeder_task = feeder_item_task<Body, Item>; - small_object_allocator alloc; - auto task = alloc.new_object<feeder_task>(item, *this, alloc); - - my_wait_context.reserve(); - spawn(*task, my_execution_context); - } - - void internal_add_copy_impl(std::false_type, const Item&) { - __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object"); - } - - void internal_add_copy(const Item& item) override { - internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item); - } - - void internal_add_move(Item&& item) override { - using feeder_task = feeder_item_task<Body, Item>; - small_object_allocator alloc{}; - auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc); - - my_wait_context.reserve(); - spawn(*task, my_execution_context); - } -public: - feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) - : my_body(body), - my_wait_context(w_context) - , my_execution_context(context) - {} - - const Body& my_body; - wait_context& my_wait_context; - task_group_context& my_execution_context; -}; // class feeder_impl - -/** Execute computation under one element of the range - @ingroup algorithms **/ -template<typename Iterator, typename Body, typename Item> -struct for_each_iteration_task: public task { - using feeder_type = feeder_impl<Body, Item>; - - for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) : - item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context) - {} - - void finalize() { - parent_wait_context.release(); - } - - task* execute(execution_data&) override { - parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr); - finalize(); - return nullptr; - } - - task* cancel(execution_data&) override { - finalize(); - return nullptr; - } - - Iterator item_ptr; - const Body& my_body; - feeder_impl<Body, Item>* my_feeder_ptr; - wait_context& parent_wait_context; -}; // class for_each_iteration_task - -// Helper to get the type of the iterator to the internal sequence of copies -// If the element can be passed to the body as an rvalue - this iterator should be move_iterator -template <typename Body, typename Item, typename = void> -struct input_iteration_task_iterator_helper { - // For input iterators we pass const lvalue reference to the body - // It is prohibited to take non-constant lvalue references for input iterators - using type = const Item*; -}; - -template <typename Body, typename Item> -struct input_iteration_task_iterator_helper<Body, Item, - tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(), - std::declval<Item&&>(), - std::declval<feeder_impl<Body, Item>*>()))>> -{ - using type = std::move_iterator<Item*>; -}; - -/** Split one block task to several(max_block_size) iteration tasks for input iterators - @ingroup algorithms **/ -template <typename Body, typename Item> -struct input_block_handling_task : public task { - static constexpr size_t max_block_size = 4; - - using feeder_type = feeder_impl<Body, Item>; - using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type; - using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>; - - input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, - const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) - :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), - my_execution_context(e_context), my_allocator(alloc) - { - auto item_it = block_iteration_space.begin(); - for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) { - new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context); - } - } - - void finalize(const execution_data& ed) { - my_root_wait_context.release(); - my_allocator.delete_object(this, ed); - } - - task* execute(execution_data& ed) override { - __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); - for (std::size_t counter = 1; counter < my_size; ++counter) { - my_wait_context.reserve(); - spawn(*(task_pool.begin() + counter), my_execution_context); - } - my_wait_context.reserve(); - execute_and_wait(*task_pool.begin(), my_execution_context, - my_wait_context, my_execution_context); - - // deallocate current task after children execution - finalize(ed); - return nullptr; - } - - task* cancel(execution_data& ed) override { - finalize(ed); - return nullptr; - } - - ~input_block_handling_task() { - for(std::size_t counter = 0; counter < max_block_size; ++counter) { - (task_pool.begin() + counter)->~iteration_task(); - (block_iteration_space.begin() + counter)->~Item(); - } - } - - aligned_space<Item, max_block_size> block_iteration_space; - aligned_space<iteration_task, max_block_size> task_pool; - std::size_t my_size; - wait_context my_wait_context; - wait_context& my_root_wait_context; - task_group_context& my_execution_context; - small_object_allocator my_allocator; -}; // class input_block_handling_task - -/** Split one block task to several(max_block_size) iteration tasks for forward iterators - @ingroup algorithms **/ -template <typename Iterator, typename Body, typename Item> -struct forward_block_handling_task : public task { - static constexpr size_t max_block_size = 4; - - using iteration_task = for_each_iteration_task<Iterator, Body, Item>; - - forward_block_handling_task(Iterator first, std::size_t size, - wait_context& w_context, task_group_context& e_context, - const Body& body, feeder_impl<Body, Item>* feeder_ptr, - small_object_allocator& alloc) - : my_size(size), my_wait_context(0), my_root_wait_context(w_context), - my_execution_context(e_context), my_allocator(alloc) - { - auto* task_it = task_pool.begin(); - for (std::size_t i = 0; i < size; i++) { - new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context); - ++first; - } - } - - void finalize(const execution_data& ed) { - my_root_wait_context.release(); - my_allocator.delete_object(this, ed); - } - - task* execute(execution_data& ed) override { - __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); - for(std::size_t counter = 1; counter < my_size; ++counter) { - my_wait_context.reserve(); - spawn(*(task_pool.begin() + counter), my_execution_context); - } - my_wait_context.reserve(); - execute_and_wait(*task_pool.begin(), my_execution_context, - my_wait_context, my_execution_context); - - // deallocate current task after children execution - finalize(ed); - return nullptr; - } - - task* cancel(execution_data& ed) override { - finalize(ed); - return nullptr; - } - - ~forward_block_handling_task() { - for(std::size_t counter = 0; counter < my_size; ++counter) { - (task_pool.begin() + counter)->~iteration_task(); - } - } - - aligned_space<iteration_task, max_block_size> task_pool; - std::size_t my_size; - wait_context my_wait_context; - wait_context& my_root_wait_context; - task_group_context& my_execution_context; - small_object_allocator my_allocator; -}; // class forward_block_handling_task - -/** Body for parallel_for algorithm. - * Allows to redirect operations under random access iterators range to the parallel_for algorithm. - @ingroup algorithms **/ -template <typename Iterator, typename Body, typename Item> -class parallel_for_body_wrapper { - Iterator my_first; - const Body& my_body; - feeder_impl<Body, Item>* my_feeder_ptr; -public: - parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr) - : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {} - - void operator()(tbb::blocked_range<std::size_t> range) const { -#if __INTEL_COMPILER -#pragma ivdep -#endif - for (std::size_t count = range.begin(); count != range.end(); count++) { - parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count), - my_feeder_ptr); - } - } -}; // class parallel_for_body_wrapper - - -/** Helper for getting iterators tag including inherited custom tags - @ingroup algorithms */ -template<typename It> -using tag = typename std::iterator_traits<It>::iterator_category; - -template<typename It> -using iterator_tag_dispatch = typename - std::conditional< - std::is_base_of<std::random_access_iterator_tag, tag<It>>::value, - std::random_access_iterator_tag, - typename std::conditional< - std::is_base_of<std::forward_iterator_tag, tag<It>>::value, - std::forward_iterator_tag, - std::input_iterator_tag - >::type - >::type; - -template <typename Body, typename Iterator, typename Item> -using feeder_is_required = tbb::detail::void_t<decltype(std::declval<const Body>()(std::declval<typename std::iterator_traits<Iterator>::reference>(), - std::declval<feeder<Item>&>()))>; - -// Creates feeder object only if the body can accept it -template <typename Iterator, typename Body, typename Item, typename = void> -struct feeder_holder { - feeder_holder( wait_context&, task_group_context&, const Body& ) {} - - feeder_impl<Body, Item>* feeder_ptr() { return nullptr; } -}; // class feeder_holder - -template <typename Iterator, typename Body, typename Item> -class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> { -public: - feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) - : my_feeder(body, w_context, context) {} - - feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; } -private: - feeder_impl<Body, Item> my_feeder; -}; // class feeder_holder - -template <typename Iterator, typename Body, typename Item> -class for_each_root_task_base : public task { -public: - for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) - : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), - my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) - { - my_wait_context.reserve(); - } -private: - task* cancel(execution_data&) override { - this->my_wait_context.release(); - return nullptr; - } -protected: - Iterator my_first; - Iterator my_last; - wait_context& my_wait_context; - task_group_context& my_execution_context; - const Body& my_body; - feeder_holder<Iterator, Body, Item> my_feeder_holder; -}; // class for_each_root_task_base - -/** parallel_for_each algorithm root task - most generic version - * Splits input range to blocks - @ingroup algorithms **/ -template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>> -class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item> -{ - using base_type = for_each_root_task_base<Iterator, Body, Item>; -public: - using base_type::base_type; -private: - task* execute(execution_data& ed) override { - using block_handling_type = input_block_handling_task<Body, Item>; - - if (this->my_first == this->my_last) { - this->my_wait_context.release(); - return nullptr; - } - - this->my_wait_context.reserve(); - small_object_allocator alloc{}; - auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context, - this->my_body, this->my_feeder_holder.feeder_ptr(), - alloc); - - auto* block_iterator = block_handling_task->block_iteration_space.begin(); - for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) { - // Move semantics are automatically used when supported by the iterator - new (block_iterator++) Item(*this->my_first); - ++block_handling_task->my_size; - } - - // Do not access this after spawn to avoid races - spawn(*this, this->my_execution_context); - return block_handling_task; - } -}; // class for_each_root_task - most generic implementation - -/** parallel_for_each algorithm root task - forward iterator based specialization - * Splits input range to blocks - @ingroup algorithms **/ -template <typename Iterator, typename Body, typename Item> -class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag> - : public for_each_root_task_base<Iterator, Body, Item> -{ - using base_type = for_each_root_task_base<Iterator, Body, Item>; -public: - using base_type::base_type; -private: - task* execute(execution_data& ed) override { - using block_handling_type = forward_block_handling_task<Iterator, Body, Item>; - if (this->my_first == this->my_last) { - this->my_wait_context.release(); - return nullptr; - } - - std::size_t block_size{0}; - Iterator first_block_element = this->my_first; - for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) { - ++block_size; - } - - this->my_wait_context.reserve(); - small_object_allocator alloc{}; - auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size, - this->my_wait_context, this->my_execution_context, - this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); - - // Do not access this after spawn to avoid races - spawn(*this, this->my_execution_context); - return block_handling_task; - } -}; // class for_each_root_task - forward iterator based specialization - -/** parallel_for_each algorithm root task - random access iterator based specialization - * Splits input range to blocks - @ingroup algorithms **/ -template <typename Iterator, typename Body, typename Item> -class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag> - : public for_each_root_task_base<Iterator, Body, Item> -{ - using base_type = for_each_root_task_base<Iterator, Body, Item>; -public: - using base_type::base_type; -private: - task* execute(execution_data&) override { - tbb::parallel_for( - tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)), - parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr()) - , this->my_execution_context - ); - - this->my_wait_context.release(); - return nullptr; - } -}; // class for_each_root_task - random access iterator based specialization - -/** Helper for getting item type. If item type can be deduced from feeder - got it from feeder, - if feeder is generic - got item type from range. - @ingroup algorithms */ -template<typename Body, typename Item, typename FeederArg> -auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg; - -template<typename Body, typename> -decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>) -template<typename Body, typename Item> Item get_item_type_impl(...); // stub - -template <typename Body, typename Item> -using get_item_type = decltype(get_item_type_impl<Body, Item>(0)); - -/** Implements parallel iteration over a range. - @ingroup algorithms */ -template<typename Iterator, typename Body> -void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context) -{ - if (!(first == last)) { - using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>; - wait_context w_context(0); - - for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context); - - execute_and_wait(root_task, context, w_context, context); - } -} - -/** \page parallel_for_each_body_req Requirements on parallel_for_each body - Class \c Body implementing the concept of parallel_for_each body must define: - - \code - B::operator()( - cv_item_type item, - feeder<item_type>& feeder - ) const - - OR - - B::operator()( cv_item_type& item ) const - \endcode Process item. - May be invoked concurrently for the same \c this but different \c item. - - - \code item_type( const item_type& ) \endcode - Copy a work item. - - \code ~item_type() \endcode Destroy a work item -**/ - -/** \name parallel_for_each - See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/ -//@{ -//! Parallel iteration over a range, with optional addition of more work. -/** @ingroup algorithms */ -template<typename Iterator, typename Body> -void parallel_for_each(Iterator first, Iterator last, const Body& body) { - task_group_context context(PARALLEL_FOR_EACH); - run_parallel_for_each<Iterator, Body>(first, last, body, context); -} - -template<typename Range, typename Body> -void parallel_for_each(Range& rng, const Body& body) { - parallel_for_each(std::begin(rng), std::end(rng), body); -} - -template<typename Range, typename Body> -void parallel_for_each(const Range& rng, const Body& body) { - parallel_for_each(std::begin(rng), std::end(rng), body); -} - -//! Parallel iteration over a range, with optional addition of more work and user-supplied context -/** @ingroup algorithms */ -template<typename Iterator, typename Body> -void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) { - run_parallel_for_each<Iterator, Body>(first, last, body, context); -} - -template<typename Range, typename Body> -void parallel_for_each(Range& rng, const Body& body, task_group_context& context) { - parallel_for_each(std::begin(rng), std::end(rng), body, context); -} - -template<typename Range, typename Body> -void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) { - parallel_for_each(std::begin(rng), std::end(rng), body, context); -} - -} // namespace d2 -} // namespace detail -//! @endcond -//@} - -inline namespace v1 { -using detail::d2::parallel_for_each; -using detail::d1::feeder; -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_parallel_for_each_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_for_each_H +#define __TBB_parallel_for_each_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#include "parallel_for.h" +#include "task_group.h" // task_group_context + +#include <iterator> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d2 { +template<typename Body, typename Item> class feeder_impl; +} // namespace d2 + +namespace d1 { +//! Class the user supplied algorithm body uses to add new tasks +template<typename Item> +class feeder { + feeder() {} + feeder(const feeder&) = delete; + void operator=( const feeder&) = delete; + + virtual ~feeder () {} + virtual void internal_add_copy(const Item& item) = 0; + virtual void internal_add_move(Item&& item) = 0; + + template<typename Body_, typename Item_> friend class detail::d2::feeder_impl; +public: + //! Add a work item to a running parallel_for_each. + void add(const Item& item) {internal_add_copy(item);} + void add(Item&& item) {internal_add_move(std::move(item));} +}; + +} // namespace d1 + +namespace d2 { +using namespace tbb::detail::d1; +/** Selects one of the two possible forms of function call member operator. + @ingroup algorithms **/ +template<class Body> +struct parallel_for_each_operator_selector { +public: + template<typename ItemArg, typename FeederArg> + static auto call(const Body& body, ItemArg&& item, FeederArg*) + -> decltype(body(std::forward<ItemArg>(item)), void()) { + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Suppression of Microsoft non-standard extension warnings + #pragma warning (push) + #pragma warning (disable: 4239) + #endif + + body(std::forward<ItemArg>(item)); + + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (push) + #endif + } + + template<typename ItemArg, typename FeederArg> + static auto call(const Body& body, ItemArg&& item, FeederArg* feeder) + -> decltype(body(std::forward<ItemArg>(item), *feeder), void()) { + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Suppression of Microsoft non-standard extension warnings + #pragma warning (push) + #pragma warning (disable: 4239) + #endif + __TBB_ASSERT(feeder, "Feeder was not created but should be"); + + body(std::forward<ItemArg>(item), *feeder); + + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (push) + #endif + } +}; + +template<typename Body, typename Item> +struct feeder_item_task: public task { + using feeder_type = feeder_impl<Body, Item>; + + template <typename ItemType> + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : + item(std::forward<ItemType>(input_item)), + my_feeder(feeder), + my_allocator(alloc) + {} + + void finalize(const execution_data& ed) { + my_feeder.my_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy + //! Executing body with moving the copy should have higher priority + using first_priority = int; + using second_priority = double; + + template <typename BodyType, typename ItemType, typename FeederType> + static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority) + -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void()) + { + parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder); + } + + template <typename BodyType, typename ItemType, typename FeederType> + static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) { + parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder); + } + + task* execute(execution_data& ed) override { + call(my_feeder.my_body, item, my_feeder, first_priority{}); + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + Item item; + feeder_type& my_feeder; + small_object_allocator my_allocator; +}; // class feeder_item_task + +/** Implements new task adding procedure. + @ingroup algorithms **/ +template<typename Body, typename Item> +class feeder_impl : public feeder<Item> { + // Avoiding use of copy constructor in a virtual method if the type does not support it + void internal_add_copy_impl(std::true_type, const Item& item) { + using feeder_task = feeder_item_task<Body, Item>; + small_object_allocator alloc; + auto task = alloc.new_object<feeder_task>(item, *this, alloc); + + my_wait_context.reserve(); + spawn(*task, my_execution_context); + } + + void internal_add_copy_impl(std::false_type, const Item&) { + __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object"); + } + + void internal_add_copy(const Item& item) override { + internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item); + } + + void internal_add_move(Item&& item) override { + using feeder_task = feeder_item_task<Body, Item>; + small_object_allocator alloc{}; + auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc); + + my_wait_context.reserve(); + spawn(*task, my_execution_context); + } +public: + feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) + : my_body(body), + my_wait_context(w_context) + , my_execution_context(context) + {} + + const Body& my_body; + wait_context& my_wait_context; + task_group_context& my_execution_context; +}; // class feeder_impl + +/** Execute computation under one element of the range + @ingroup algorithms **/ +template<typename Iterator, typename Body, typename Item> +struct for_each_iteration_task: public task { + using feeder_type = feeder_impl<Body, Item>; + + for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) : + item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context) + {} + + void finalize() { + parent_wait_context.release(); + } + + task* execute(execution_data&) override { + parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr); + finalize(); + return nullptr; + } + + task* cancel(execution_data&) override { + finalize(); + return nullptr; + } + + Iterator item_ptr; + const Body& my_body; + feeder_impl<Body, Item>* my_feeder_ptr; + wait_context& parent_wait_context; +}; // class for_each_iteration_task + +// Helper to get the type of the iterator to the internal sequence of copies +// If the element can be passed to the body as an rvalue - this iterator should be move_iterator +template <typename Body, typename Item, typename = void> +struct input_iteration_task_iterator_helper { + // For input iterators we pass const lvalue reference to the body + // It is prohibited to take non-constant lvalue references for input iterators + using type = const Item*; +}; + +template <typename Body, typename Item> +struct input_iteration_task_iterator_helper<Body, Item, + tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(), + std::declval<Item&&>(), + std::declval<feeder_impl<Body, Item>*>()))>> +{ + using type = std::move_iterator<Item*>; +}; + +/** Split one block task to several(max_block_size) iteration tasks for input iterators + @ingroup algorithms **/ +template <typename Body, typename Item> +struct input_block_handling_task : public task { + static constexpr size_t max_block_size = 4; + + using feeder_type = feeder_impl<Body, Item>; + using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type; + using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>; + + input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, + const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) + :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), + my_execution_context(e_context), my_allocator(alloc) + { + auto item_it = block_iteration_space.begin(); + for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) { + new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context); + } + } + + void finalize(const execution_data& ed) { + my_root_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + task* execute(execution_data& ed) override { + __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); + for (std::size_t counter = 1; counter < my_size; ++counter) { + my_wait_context.reserve(); + spawn(*(task_pool.begin() + counter), my_execution_context); + } + my_wait_context.reserve(); + execute_and_wait(*task_pool.begin(), my_execution_context, + my_wait_context, my_execution_context); + + // deallocate current task after children execution + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~input_block_handling_task() { + for(std::size_t counter = 0; counter < max_block_size; ++counter) { + (task_pool.begin() + counter)->~iteration_task(); + (block_iteration_space.begin() + counter)->~Item(); + } + } + + aligned_space<Item, max_block_size> block_iteration_space; + aligned_space<iteration_task, max_block_size> task_pool; + std::size_t my_size; + wait_context my_wait_context; + wait_context& my_root_wait_context; + task_group_context& my_execution_context; + small_object_allocator my_allocator; +}; // class input_block_handling_task + +/** Split one block task to several(max_block_size) iteration tasks for forward iterators + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +struct forward_block_handling_task : public task { + static constexpr size_t max_block_size = 4; + + using iteration_task = for_each_iteration_task<Iterator, Body, Item>; + + forward_block_handling_task(Iterator first, std::size_t size, + wait_context& w_context, task_group_context& e_context, + const Body& body, feeder_impl<Body, Item>* feeder_ptr, + small_object_allocator& alloc) + : my_size(size), my_wait_context(0), my_root_wait_context(w_context), + my_execution_context(e_context), my_allocator(alloc) + { + auto* task_it = task_pool.begin(); + for (std::size_t i = 0; i < size; i++) { + new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context); + ++first; + } + } + + void finalize(const execution_data& ed) { + my_root_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + task* execute(execution_data& ed) override { + __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); + for(std::size_t counter = 1; counter < my_size; ++counter) { + my_wait_context.reserve(); + spawn(*(task_pool.begin() + counter), my_execution_context); + } + my_wait_context.reserve(); + execute_and_wait(*task_pool.begin(), my_execution_context, + my_wait_context, my_execution_context); + + // deallocate current task after children execution + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~forward_block_handling_task() { + for(std::size_t counter = 0; counter < my_size; ++counter) { + (task_pool.begin() + counter)->~iteration_task(); + } + } + + aligned_space<iteration_task, max_block_size> task_pool; + std::size_t my_size; + wait_context my_wait_context; + wait_context& my_root_wait_context; + task_group_context& my_execution_context; + small_object_allocator my_allocator; +}; // class forward_block_handling_task + +/** Body for parallel_for algorithm. + * Allows to redirect operations under random access iterators range to the parallel_for algorithm. + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class parallel_for_body_wrapper { + Iterator my_first; + const Body& my_body; + feeder_impl<Body, Item>* my_feeder_ptr; +public: + parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr) + : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {} + + void operator()(tbb::blocked_range<std::size_t> range) const { +#if __INTEL_COMPILER +#pragma ivdep +#endif + for (std::size_t count = range.begin(); count != range.end(); count++) { + parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count), + my_feeder_ptr); + } + } +}; // class parallel_for_body_wrapper + + +/** Helper for getting iterators tag including inherited custom tags + @ingroup algorithms */ +template<typename It> +using tag = typename std::iterator_traits<It>::iterator_category; + +template<typename It> +using iterator_tag_dispatch = typename + std::conditional< + std::is_base_of<std::random_access_iterator_tag, tag<It>>::value, + std::random_access_iterator_tag, + typename std::conditional< + std::is_base_of<std::forward_iterator_tag, tag<It>>::value, + std::forward_iterator_tag, + std::input_iterator_tag + >::type + >::type; + +template <typename Body, typename Iterator, typename Item> +using feeder_is_required = tbb::detail::void_t<decltype(std::declval<const Body>()(std::declval<typename std::iterator_traits<Iterator>::reference>(), + std::declval<feeder<Item>&>()))>; + +// Creates feeder object only if the body can accept it +template <typename Iterator, typename Body, typename Item, typename = void> +struct feeder_holder { + feeder_holder( wait_context&, task_group_context&, const Body& ) {} + + feeder_impl<Body, Item>* feeder_ptr() { return nullptr; } +}; // class feeder_holder + +template <typename Iterator, typename Body, typename Item> +class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> { +public: + feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) + : my_feeder(body, w_context, context) {} + + feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; } +private: + feeder_impl<Body, Item> my_feeder; +}; // class feeder_holder + +template <typename Iterator, typename Body, typename Item> +class for_each_root_task_base : public task { +public: + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) + : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), + my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) + { + my_wait_context.reserve(); + } +private: + task* cancel(execution_data&) override { + this->my_wait_context.release(); + return nullptr; + } +protected: + Iterator my_first; + Iterator my_last; + wait_context& my_wait_context; + task_group_context& my_execution_context; + const Body& my_body; + feeder_holder<Iterator, Body, Item> my_feeder_holder; +}; // class for_each_root_task_base + +/** parallel_for_each algorithm root task - most generic version + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>> +class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data& ed) override { + using block_handling_type = input_block_handling_task<Body, Item>; + + if (this->my_first == this->my_last) { + this->my_wait_context.release(); + return nullptr; + } + + this->my_wait_context.reserve(); + small_object_allocator alloc{}; + auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context, + this->my_body, this->my_feeder_holder.feeder_ptr(), + alloc); + + auto* block_iterator = block_handling_task->block_iteration_space.begin(); + for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) { + // Move semantics are automatically used when supported by the iterator + new (block_iterator++) Item(*this->my_first); + ++block_handling_task->my_size; + } + + // Do not access this after spawn to avoid races + spawn(*this, this->my_execution_context); + return block_handling_task; + } +}; // class for_each_root_task - most generic implementation + +/** parallel_for_each algorithm root task - forward iterator based specialization + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag> + : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data& ed) override { + using block_handling_type = forward_block_handling_task<Iterator, Body, Item>; + if (this->my_first == this->my_last) { + this->my_wait_context.release(); + return nullptr; + } + + std::size_t block_size{0}; + Iterator first_block_element = this->my_first; + for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) { + ++block_size; + } + + this->my_wait_context.reserve(); + small_object_allocator alloc{}; + auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size, + this->my_wait_context, this->my_execution_context, + this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); + + // Do not access this after spawn to avoid races + spawn(*this, this->my_execution_context); + return block_handling_task; + } +}; // class for_each_root_task - forward iterator based specialization + +/** parallel_for_each algorithm root task - random access iterator based specialization + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag> + : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data&) override { + tbb::parallel_for( + tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)), + parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr()) + , this->my_execution_context + ); + + this->my_wait_context.release(); + return nullptr; + } +}; // class for_each_root_task - random access iterator based specialization + +/** Helper for getting item type. If item type can be deduced from feeder - got it from feeder, + if feeder is generic - got item type from range. + @ingroup algorithms */ +template<typename Body, typename Item, typename FeederArg> +auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg; + +template<typename Body, typename> +decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>) +template<typename Body, typename Item> Item get_item_type_impl(...); // stub + +template <typename Body, typename Item> +using get_item_type = decltype(get_item_type_impl<Body, Item>(0)); + +/** Implements parallel iteration over a range. + @ingroup algorithms */ +template<typename Iterator, typename Body> +void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context) +{ + if (!(first == last)) { + using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>; + wait_context w_context(0); + + for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context); + + execute_and_wait(root_task, context, w_context, context); + } +} + +/** \page parallel_for_each_body_req Requirements on parallel_for_each body + Class \c Body implementing the concept of parallel_for_each body must define: + - \code + B::operator()( + cv_item_type item, + feeder<item_type>& feeder + ) const + + OR + + B::operator()( cv_item_type& item ) const + \endcode Process item. + May be invoked concurrently for the same \c this but different \c item. + + - \code item_type( const item_type& ) \endcode + Copy a work item. + - \code ~item_type() \endcode Destroy a work item +**/ + +/** \name parallel_for_each + See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/ +//@{ +//! Parallel iteration over a range, with optional addition of more work. +/** @ingroup algorithms */ +template<typename Iterator, typename Body> +void parallel_for_each(Iterator first, Iterator last, const Body& body) { + task_group_context context(PARALLEL_FOR_EACH); + run_parallel_for_each<Iterator, Body>(first, last, body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(Range& rng, const Body& body) { + parallel_for_each(std::begin(rng), std::end(rng), body); +} + +template<typename Range, typename Body> +void parallel_for_each(const Range& rng, const Body& body) { + parallel_for_each(std::begin(rng), std::end(rng), body); +} + +//! Parallel iteration over a range, with optional addition of more work and user-supplied context +/** @ingroup algorithms */ +template<typename Iterator, typename Body> +void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) { + run_parallel_for_each<Iterator, Body>(first, last, body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(Range& rng, const Body& body, task_group_context& context) { + parallel_for_each(std::begin(rng), std::end(rng), body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) { + parallel_for_each(std::begin(rng), std::end(rng), body, context); +} + +} // namespace d2 +} // namespace detail +//! @endcond +//@} + +inline namespace v1 { +using detail::d2::parallel_for_each; +using detail::d1::feeder; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_for_each_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h index 6eb0f2e530..9138270b0c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h @@ -1,227 +1,227 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_invoke_H -#define __TBB_parallel_invoke_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_exception.h" -#include "detail/_task.h" -#include "detail/_template_helpers.h" -#include "detail/_small_object_pool.h" - -#include "task_group.h" - -#include <tuple> -#include <atomic> -#include <utility> - -namespace tbb { -namespace detail { -namespace d1 { - -//! Simple task object, executing user method -template<typename Function, typename WaitObject> -struct function_invoker : public task { - function_invoker(const Function& function, WaitObject& wait_ctx) : - my_function(function), - parent_wait_ctx(wait_ctx) - {} - - task* execute(execution_data& ed) override { - my_function(); - parent_wait_ctx.release(ed); - call_itt_task_notify(destroy, this); - return nullptr; - } - - task* cancel(execution_data& ed) override { - parent_wait_ctx.release(ed); - return nullptr; - } - - const Function& my_function; - WaitObject& parent_wait_ctx; -}; // struct function_invoker - -//! Task object for managing subroots in trinary task trees. -// Endowed with additional synchronization logic (compatible with wait object intefaces) to support -// continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors -// and then executes first functor by itself. But only the last executed functor must destruct and deallocate -// the subroot task. -template<typename F1, typename F2, typename F3> -struct invoke_subroot_task : public task { - wait_context& root_wait_ctx; - std::atomic<unsigned> ref_count{0}; - bool child_spawned = false; - - const F1& self_invoked_functor; - function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker; - function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker; - - task_group_context& my_execution_context; - small_object_allocator my_allocator; - - invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context, - small_object_allocator& alloc) : - root_wait_ctx(wait_ctx), - self_invoked_functor(f1), - f2_invoker(f2, *this), - f3_invoker(f3, *this), - my_execution_context(context), - my_allocator(alloc) - { - root_wait_ctx.reserve(); - } - - void finalize(const execution_data& ed) { - root_wait_ctx.release(); - - my_allocator.delete_object(this, ed); - } - - void release(const execution_data& ed) { - __TBB_ASSERT(ref_count > 0, nullptr); - call_itt_task_notify(releasing, this); - if( --ref_count == 0 ) { - call_itt_task_notify(acquired, this); - finalize(ed); - } - } - - task* execute(execution_data& ed) override { - ref_count.fetch_add(3, std::memory_order_relaxed); - spawn(f3_invoker, my_execution_context); - spawn(f2_invoker, my_execution_context); - self_invoked_functor(); - - release(ed); - return nullptr; - } - - task* cancel(execution_data& ed) override { - if( ref_count > 0 ) { // detect children spawn - release(ed); - } else { - finalize(ed); - } - return nullptr; - } -}; // struct subroot_task - -class invoke_root_task { -public: - invoke_root_task(wait_context& wc) : my_wait_context(wc) {} - void release(const execution_data&) { - my_wait_context.release(); - } -private: - wait_context& my_wait_context; -}; - -template<typename F1> -void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) { - root_wait_ctx.reserve(1); - invoke_root_task root(root_wait_ctx); - function_invoker<F1, invoke_root_task> invoker1(f1, root); - - execute_and_wait(invoker1, context, root_wait_ctx, context); -} - -template<typename F1, typename F2> -void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) { - root_wait_ctx.reserve(2); - invoke_root_task root(root_wait_ctx); - function_invoker<F1, invoke_root_task> invoker1(f1, root); - function_invoker<F2, invoke_root_task> invoker2(f2, root); - - spawn(invoker1, context); - execute_and_wait(invoker2, context, root_wait_ctx, context); -} - -template<typename F1, typename F2, typename F3> -void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) { - root_wait_ctx.reserve(3); - invoke_root_task root(root_wait_ctx); - function_invoker<F1, invoke_root_task> invoker1(f1, root); - function_invoker<F2, invoke_root_task> invoker2(f2, root); - function_invoker<F3, invoke_root_task> invoker3(f3, root); - - //TODO: implement sub root for two tasks (measure performance) - spawn(invoker1, context); - spawn(invoker2, context); - execute_and_wait(invoker3, context, root_wait_ctx, context); -} - -template<typename F1, typename F2, typename F3, typename... Fs> -void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, - const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) { - small_object_allocator alloc{}; - auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc); - spawn(*sub_root, context); - - invoke_recursive_separation(root_wait_ctx, context, fs...); -} - -template<typename... Fs> -void parallel_invoke_impl(task_group_context& context, const Fs&... fs) { - static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable"); - wait_context root_wait_ctx{0}; - - invoke_recursive_separation(root_wait_ctx, context, fs...); -} - -template<typename F1, typename... Fs> -void parallel_invoke_impl(const F1& f1, const Fs&... fs) { - static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable"); - task_group_context context(PARALLEL_INVOKE); - wait_context root_wait_ctx{0}; - - invoke_recursive_separation(root_wait_ctx, context, fs..., f1); -} - -//! Passes last argument of variadic pack as first for handling user provided task_group_context -template <typename Tuple, typename... Fs> -struct invoke_helper; - -template <typename... Args, typename T, typename... Fs> -struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {}; - -template <typename... Fs, typename T/*task_group_context or callable*/> -struct invoke_helper<std::tuple<Fs...>, T> { - void operator()(Fs&&... args, T&& t) { - parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...); - } -}; - -//! Parallel execution of several function objects -// We need to pass parameter pack through forwarding reference, -// since this pack may contain task_group_context that must be passed via lvalue non-const reference -template<typename... Fs> -void parallel_invoke(Fs&&... fs) { - invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::parallel_invoke; -} // namespace v1 - -} // namespace tbb -#endif /* __TBB_parallel_invoke_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_invoke_H +#define __TBB_parallel_invoke_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_template_helpers.h" +#include "detail/_small_object_pool.h" + +#include "task_group.h" + +#include <tuple> +#include <atomic> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Simple task object, executing user method +template<typename Function, typename WaitObject> +struct function_invoker : public task { + function_invoker(const Function& function, WaitObject& wait_ctx) : + my_function(function), + parent_wait_ctx(wait_ctx) + {} + + task* execute(execution_data& ed) override { + my_function(); + parent_wait_ctx.release(ed); + call_itt_task_notify(destroy, this); + return nullptr; + } + + task* cancel(execution_data& ed) override { + parent_wait_ctx.release(ed); + return nullptr; + } + + const Function& my_function; + WaitObject& parent_wait_ctx; +}; // struct function_invoker + +//! Task object for managing subroots in trinary task trees. +// Endowed with additional synchronization logic (compatible with wait object intefaces) to support +// continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors +// and then executes first functor by itself. But only the last executed functor must destruct and deallocate +// the subroot task. +template<typename F1, typename F2, typename F3> +struct invoke_subroot_task : public task { + wait_context& root_wait_ctx; + std::atomic<unsigned> ref_count{0}; + bool child_spawned = false; + + const F1& self_invoked_functor; + function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker; + function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker; + + task_group_context& my_execution_context; + small_object_allocator my_allocator; + + invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context, + small_object_allocator& alloc) : + root_wait_ctx(wait_ctx), + self_invoked_functor(f1), + f2_invoker(f2, *this), + f3_invoker(f3, *this), + my_execution_context(context), + my_allocator(alloc) + { + root_wait_ctx.reserve(); + } + + void finalize(const execution_data& ed) { + root_wait_ctx.release(); + + my_allocator.delete_object(this, ed); + } + + void release(const execution_data& ed) { + __TBB_ASSERT(ref_count > 0, nullptr); + call_itt_task_notify(releasing, this); + if( --ref_count == 0 ) { + call_itt_task_notify(acquired, this); + finalize(ed); + } + } + + task* execute(execution_data& ed) override { + ref_count.fetch_add(3, std::memory_order_relaxed); + spawn(f3_invoker, my_execution_context); + spawn(f2_invoker, my_execution_context); + self_invoked_functor(); + + release(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + if( ref_count > 0 ) { // detect children spawn + release(ed); + } else { + finalize(ed); + } + return nullptr; + } +}; // struct subroot_task + +class invoke_root_task { +public: + invoke_root_task(wait_context& wc) : my_wait_context(wc) {} + void release(const execution_data&) { + my_wait_context.release(); + } +private: + wait_context& my_wait_context; +}; + +template<typename F1> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) { + root_wait_ctx.reserve(1); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + + execute_and_wait(invoker1, context, root_wait_ctx, context); +} + +template<typename F1, typename F2> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) { + root_wait_ctx.reserve(2); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + function_invoker<F2, invoke_root_task> invoker2(f2, root); + + spawn(invoker1, context); + execute_and_wait(invoker2, context, root_wait_ctx, context); +} + +template<typename F1, typename F2, typename F3> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) { + root_wait_ctx.reserve(3); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + function_invoker<F2, invoke_root_task> invoker2(f2, root); + function_invoker<F3, invoke_root_task> invoker3(f3, root); + + //TODO: implement sub root for two tasks (measure performance) + spawn(invoker1, context); + spawn(invoker2, context); + execute_and_wait(invoker3, context, root_wait_ctx, context); +} + +template<typename F1, typename F2, typename F3, typename... Fs> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, + const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) { + small_object_allocator alloc{}; + auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc); + spawn(*sub_root, context); + + invoke_recursive_separation(root_wait_ctx, context, fs...); +} + +template<typename... Fs> +void parallel_invoke_impl(task_group_context& context, const Fs&... fs) { + static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable"); + wait_context root_wait_ctx{0}; + + invoke_recursive_separation(root_wait_ctx, context, fs...); +} + +template<typename F1, typename... Fs> +void parallel_invoke_impl(const F1& f1, const Fs&... fs) { + static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable"); + task_group_context context(PARALLEL_INVOKE); + wait_context root_wait_ctx{0}; + + invoke_recursive_separation(root_wait_ctx, context, fs..., f1); +} + +//! Passes last argument of variadic pack as first for handling user provided task_group_context +template <typename Tuple, typename... Fs> +struct invoke_helper; + +template <typename... Args, typename T, typename... Fs> +struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {}; + +template <typename... Fs, typename T/*task_group_context or callable*/> +struct invoke_helper<std::tuple<Fs...>, T> { + void operator()(Fs&&... args, T&& t) { + parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...); + } +}; + +//! Parallel execution of several function objects +// We need to pass parameter pack through forwarding reference, +// since this pack may contain task_group_context that must be passed via lvalue non-const reference +template<typename... Fs> +void parallel_invoke(Fs&&... fs) { + invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_invoke; +} // namespace v1 + +} // namespace tbb +#endif /* __TBB_parallel_invoke_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h index 87a159c925..c2a21bc798 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h @@ -1,153 +1,153 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_pipeline_H -#define __TBB_parallel_pipeline_H - -#include "detail/_pipeline_filters.h" -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "task_group.h" - -#include <cstddef> -#include <atomic> -#include <type_traits> - -namespace tbb { -namespace detail { - -namespace r1 { -void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); -} - -namespace d1 { - -enum class filter_mode : unsigned int -{ - //! processes multiple items in parallel and in no particular order - parallel = base_filter::filter_is_out_of_order, - //! processes items one at a time; all such filters process items in the same order - serial_in_order = base_filter::filter_is_serial, - //! processes items one at a time and in no particular order - serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order -}; -//! Class representing a chain of type-safe pipeline filters -/** @ingroup algorithms */ -template<typename InputType, typename OutputType> -class filter { - filter_node_ptr my_root; - filter( filter_node_ptr root ) : my_root(root) {} - friend void parallel_pipeline( size_t, const filter<void,void>&, task_group_context& ); - template<typename T_, typename U_, typename Body> - friend filter<T_,U_> make_filter( filter_mode, const Body& ); - template<typename T_, typename V_, typename U_> - friend filter<T_,U_> operator&( const filter<T_,V_>&, const filter<V_,U_>& ); -public: - filter() = default; - filter( const filter& rhs ) : my_root(rhs.my_root) {} - filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {} - - void operator=(const filter& rhs) { - my_root = rhs.my_root; - } - void operator=( filter&& rhs ) { - my_root = std::move(rhs.my_root); - } - - template<typename Body> - filter( filter_mode mode, const Body& body ) : - my_root( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) - filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ) { - } - - filter& operator&=( const filter<OutputType,OutputType>& right ) { - *this = *this & right; - return *this; - } - - void clear() { - // Like operator= with filter() on right side. - my_root = nullptr; - } -}; - -//! Create a filter to participate in parallel_pipeline -/** @ingroup algorithms */ -template<typename InputType, typename OutputType, typename Body> -filter<InputType, OutputType> make_filter( filter_mode mode, const Body& body ) { - return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) - filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ); -} - -//! Create a filter to participate in parallel_pipeline -/** @ingroup algorithms */ -template<typename Body> -filter<filter_input<Body>, filter_output<Body>> make_filter( filter_mode mode, const Body& body ) { - return make_filter<filter_input<Body>, filter_output<Body>>(mode, body); -} - -//! Composition of filters left and right. -/** @ingroup algorithms */ -template<typename T, typename V, typename U> -filter<T,U> operator&( const filter<T,V>& left, const filter<V,U>& right ) { - __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'"); - __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'"); - return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) ); -} - -#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT -template<typename Body> -filter(filter_mode, Body) -->filter<filter_input<Body>, filter_output<Body>>; -#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT - -//! Parallel pipeline over chain of filters with user-supplied context. -/** @ingroup algorithms **/ -inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain, task_group_context& context) { - r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root); -} - -//! Parallel pipeline over chain of filters. -/** @ingroup algorithms **/ -inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain) { - task_group_context context; - parallel_pipeline(max_number_of_live_tokens, filter_chain, context); -} - -//! Parallel pipeline over sequence of filters. -/** @ingroup algorithms **/ -template<typename F1, typename F2, typename... FiltersContext> -void parallel_pipeline(size_t max_number_of_live_tokens, - const F1& filter1, - const F2& filter2, - FiltersContext&&... filters) { - parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward<FiltersContext>(filters)...); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 -{ -using detail::d1::parallel_pipeline; -using detail::d1::filter; -using detail::d1::make_filter; -using detail::d1::filter_mode; -using detail::d1::flow_control; -} -} // tbb - -#endif /* __TBB_parallel_pipeline_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_pipeline_H +#define __TBB_parallel_pipeline_H + +#include "detail/_pipeline_filters.h" +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "task_group.h" + +#include <cstddef> +#include <atomic> +#include <type_traits> + +namespace tbb { +namespace detail { + +namespace r1 { +void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); +} + +namespace d1 { + +enum class filter_mode : unsigned int +{ + //! processes multiple items in parallel and in no particular order + parallel = base_filter::filter_is_out_of_order, + //! processes items one at a time; all such filters process items in the same order + serial_in_order = base_filter::filter_is_serial, + //! processes items one at a time and in no particular order + serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order +}; +//! Class representing a chain of type-safe pipeline filters +/** @ingroup algorithms */ +template<typename InputType, typename OutputType> +class filter { + filter_node_ptr my_root; + filter( filter_node_ptr root ) : my_root(root) {} + friend void parallel_pipeline( size_t, const filter<void,void>&, task_group_context& ); + template<typename T_, typename U_, typename Body> + friend filter<T_,U_> make_filter( filter_mode, const Body& ); + template<typename T_, typename V_, typename U_> + friend filter<T_,U_> operator&( const filter<T_,V_>&, const filter<V_,U_>& ); +public: + filter() = default; + filter( const filter& rhs ) : my_root(rhs.my_root) {} + filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {} + + void operator=(const filter& rhs) { + my_root = rhs.my_root; + } + void operator=( filter&& rhs ) { + my_root = std::move(rhs.my_root); + } + + template<typename Body> + filter( filter_mode mode, const Body& body ) : + my_root( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) + filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ) { + } + + filter& operator&=( const filter<OutputType,OutputType>& right ) { + *this = *this & right; + return *this; + } + + void clear() { + // Like operator= with filter() on right side. + my_root = nullptr; + } +}; + +//! Create a filter to participate in parallel_pipeline +/** @ingroup algorithms */ +template<typename InputType, typename OutputType, typename Body> +filter<InputType, OutputType> make_filter( filter_mode mode, const Body& body ) { + return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) + filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ); +} + +//! Create a filter to participate in parallel_pipeline +/** @ingroup algorithms */ +template<typename Body> +filter<filter_input<Body>, filter_output<Body>> make_filter( filter_mode mode, const Body& body ) { + return make_filter<filter_input<Body>, filter_output<Body>>(mode, body); +} + +//! Composition of filters left and right. +/** @ingroup algorithms */ +template<typename T, typename V, typename U> +filter<T,U> operator&( const filter<T,V>& left, const filter<V,U>& right ) { + __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'"); + __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'"); + return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) ); +} + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template<typename Body> +filter(filter_mode, Body) +->filter<filter_input<Body>, filter_output<Body>>; +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +//! Parallel pipeline over chain of filters with user-supplied context. +/** @ingroup algorithms **/ +inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain, task_group_context& context) { + r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root); +} + +//! Parallel pipeline over chain of filters. +/** @ingroup algorithms **/ +inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain) { + task_group_context context; + parallel_pipeline(max_number_of_live_tokens, filter_chain, context); +} + +//! Parallel pipeline over sequence of filters. +/** @ingroup algorithms **/ +template<typename F1, typename F2, typename... FiltersContext> +void parallel_pipeline(size_t max_number_of_live_tokens, + const F1& filter1, + const F2& filter2, + FiltersContext&&... filters) { + parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward<FiltersContext>(filters)...); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 +{ +using detail::d1::parallel_pipeline; +using detail::d1::filter; +using detail::d1::make_filter; +using detail::d1::filter_mode; +using detail::d1::flow_control; +} +} // tbb + +#endif /* __TBB_parallel_pipeline_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h index 6db6369d68..e41cc29449 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h @@ -1,689 +1,689 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_reduce_H -#define __TBB_parallel_reduce_H - -#include <new> -#include "detail/_namespace_injection.h" -#include "detail/_task.h" -#include "detail/_aligned_space.h" -#include "detail/_small_object_pool.h" - -#include "task_group.h" // task_group_context -#include "partitioner.h" -#include "profiling.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! Tree node type for parallel_reduce. -/** @ingroup algorithms */ -//TODO: consider folding tree via bypass execution(instead of manual folding) -// for better cancellation and critical tasks handling (performance measurements required). -template<typename Body> -struct reduction_tree_node : public tree_node { - tbb::detail::aligned_space<Body> zombie_space; - Body& left_body; - bool has_right_zombie{false}; - - reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : - tree_node{parent, ref_count, alloc}, - left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ - {} - - void join(task_group_context* context) { - if (has_right_zombie && !context->is_group_execution_cancelled()) - left_body.join(*zombie_space.begin()); - } - - ~reduction_tree_node() { - if( has_right_zombie ) zombie_space.begin()->~Body(); - } -}; - -//! Task type used to split the work of parallel_reduce. -/** @ingroup algorithms */ -template<typename Range, typename Body, typename Partitioner> -struct start_reduce : public task { - Range my_range; - Body* my_body; - node* my_parent; - - typename Partitioner::task_partition_type my_partition; - small_object_allocator my_allocator; - bool is_right_child; - - task* execute(execution_data&) override; - task* cancel(execution_data&) override; - void finalize(const execution_data&); - - using tree_node_type = reduction_tree_node<Body>; - - //! Constructor reduce root task. - start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : - my_range(range), - my_body(&body), - my_partition(partitioner), - my_allocator(alloc), - is_right_child(false) {} - //! Splitting constructor used to generate children. - /** parent_ becomes left child. Newly constructed object is right child. */ - start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : - my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), - my_body(parent_.my_body), - my_partition(parent_.my_partition, split_obj), - my_allocator(alloc), - is_right_child(true) - { - parent_.is_right_child = false; - } - //! Construct right child from the given range as response to the demand. - /** parent_ remains left child. Newly constructed object is right child. */ - start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : - my_range(r), - my_body(parent_.my_body), - my_partition(parent_.my_partition, split()), - my_allocator(alloc), - is_right_child(true) - { - my_partition.align_depth( d ); - parent_.is_right_child = false; - } - static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { - if ( !range.empty() ) { - wait_node wn; - small_object_allocator alloc{}; - auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); - reduce_task->my_parent = &wn; - execute_and_wait(*reduce_task, context, wn.m_wait, context); - } - } - static void run(const Range& range, Body& body, Partitioner& partitioner) { - // Bound context prevents exceptions from body to affect nesting or sibling algorithms, - // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. - task_group_context context(PARALLEL_REDUCE); - run(range, body, partitioner, context); - } - //! Run body for range, serves as callback for partitioner - void run_body( Range &r ) { - (*my_body)(r); - } - - //! spawn right task, serves as callback for partitioner - void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { - offer_work_impl(ed, *this, split_obj); - } - //! spawn right task, serves as callback for partitioner - void offer_work(const Range& r, depth_t d, execution_data& ed) { - offer_work_impl(ed, *this, r, d); - } - -private: - template <typename... Args> - void offer_work_impl(execution_data& ed, Args&&... args) { - small_object_allocator alloc{}; - // New right child - auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); - - // New root node as a continuation and ref count. Left and right child attach to the new parent. - right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); - - // Spawn the right sibling - right_child->spawn_self(ed); - } - - void spawn_self(execution_data& ed) { - my_partition.spawn_task(*this, *context(ed)); - } -}; - -//! fold the tree and deallocate the task -template<typename Range, typename Body, typename Partitioner> -void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { - // Get the current parent and wait object before an object destruction - node* parent = my_parent; - auto allocator = my_allocator; - // Task execution finished - destroy it - this->~start_reduce(); - // Unwind the tree decrementing the parent`s reference count - fold_tree<tree_node_type>(parent, ed); - allocator.deallocate(this, ed); -} - -//! Execute parallel_reduce task -template<typename Range, typename Body, typename Partitioner> -task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { - if (!is_same_affinity(ed)) { - my_partition.note_affinity(execution_slot(ed)); - } - my_partition.check_being_stolen(*this, ed); - - // The acquire barrier synchronizes the data pointed with my_body if the left - // task has already finished. - if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { - tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); - my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); - parent_ptr->has_right_zombie = true; - } - __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); - - my_partition.execute(*this, my_range, ed); - - finalize(ed); - return nullptr; -} - -//! Cancel parallel_reduce task -template<typename Range, typename Body, typename Partitioner> -task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { - finalize(ed); - return nullptr; -} - -//! Tree node type for parallel_deterministic_reduce. -/** @ingroup algorithms */ -template<typename Body> -struct deterministic_reduction_tree_node : public tree_node { - Body right_body; - Body& left_body; - - deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : - tree_node{parent, ref_count, alloc}, - right_body{input_left_body, detail::split()}, - left_body(input_left_body) - {} - - void join(task_group_context* context) { - if (!context->is_group_execution_cancelled()) - left_body.join(right_body); - } -}; - -//! Task type used to split the work of parallel_deterministic_reduce. -/** @ingroup algorithms */ -template<typename Range, typename Body, typename Partitioner> -struct start_deterministic_reduce : public task { - Range my_range; - Body& my_body; - node* my_parent; - - typename Partitioner::task_partition_type my_partition; - small_object_allocator my_allocator; - - task* execute(execution_data&) override; - task* cancel(execution_data&) override; - void finalize(const execution_data&); - - using tree_node_type = deterministic_reduction_tree_node<Body>; - - //! Constructor deterministic_reduce root task. - start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : - my_range(range), - my_body(body), - my_partition(partitioner), - my_allocator(alloc) {} - //! Splitting constructor used to generate children. - /** parent_ becomes left child. Newly constructed object is right child. */ - start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, - small_object_allocator& alloc ) : - my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), - my_body(body), - my_partition(parent_.my_partition, split_obj), - my_allocator(alloc) {} - static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { - if ( !range.empty() ) { - wait_node wn; - small_object_allocator alloc{}; - auto deterministic_reduce_task = - alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); - deterministic_reduce_task->my_parent = &wn; - execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); - } - } - static void run(const Range& range, Body& body, Partitioner& partitioner) { - // Bound context prevents exceptions from body to affect nesting or sibling algorithms, - // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce - // in the try-block. - task_group_context context(PARALLEL_REDUCE); - run(range, body, partitioner, context); - } - //! Run body for range, serves as callback for partitioner - void run_body( Range &r ) { - my_body( r ); - } - //! Spawn right task, serves as callback for partitioner - void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { - offer_work_impl(ed, *this, split_obj); - } -private: - template <typename... Args> - void offer_work_impl(execution_data& ed, Args&&... args) { - small_object_allocator alloc{}; - // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. - auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); - - // New right child - auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); - - right_child->my_parent = my_parent = new_tree_node; - - // Spawn the right sibling - right_child->spawn_self(ed); - } - - void spawn_self(execution_data& ed) { - my_partition.spawn_task(*this, *context(ed)); - } -}; - -//! Fold the tree and deallocate the task -template<typename Range, typename Body, typename Partitioner> -void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { - // Get the current parent and wait object before an object destruction - node* parent = my_parent; - - auto allocator = my_allocator; - // Task execution finished - destroy it - this->~start_deterministic_reduce(); - // Unwind the tree decrementing the parent`s reference count - fold_tree<tree_node_type>(parent, ed); - allocator.deallocate(this, ed); -} - -//! Execute parallel_deterministic_reduce task -template<typename Range, typename Body, typename Partitioner> -task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { - if (!is_same_affinity(ed)) { - my_partition.note_affinity(execution_slot(ed)); - } - my_partition.check_being_stolen(*this, ed); - - my_partition.execute(*this, my_range, ed); - - finalize(ed); - return NULL; -} - -//! Cancel parallel_deterministic_reduce task -template<typename Range, typename Body, typename Partitioner> -task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { - finalize(ed); - return NULL; -} - - -//! Auxiliary class for parallel_reduce; for internal use only. -/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" - using given \ref parallel_reduce_lambda_req "anonymous function objects". - **/ -/** @ingroup algorithms */ -template<typename Range, typename Value, typename RealBody, typename Reduction> -class lambda_reduce_body { -//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced -// (might require some performance measurements) - - const Value& my_identity_element; - const RealBody& my_real_body; - const Reduction& my_reduction; - Value my_value; - lambda_reduce_body& operator= ( const lambda_reduce_body& other ); -public: - lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) - : my_identity_element(identity) - , my_real_body(body) - , my_reduction(reduction) - , my_value(identity) - { } - lambda_reduce_body( const lambda_reduce_body& other ) = default; - lambda_reduce_body( lambda_reduce_body& other, tbb::split ) - : my_identity_element(other.my_identity_element) - , my_real_body(other.my_real_body) - , my_reduction(other.my_reduction) - , my_value(other.my_identity_element) - { } - void operator()(Range& range) { - my_value = my_real_body(range, const_cast<const Value&>(my_value)); - } - void join( lambda_reduce_body& rhs ) { - my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); - } - Value result() const { - return my_value; - } -}; - - -// Requirements on Range concept are documented in blocked_range.h - -/** \page parallel_reduce_body_req Requirements on parallel_reduce body - Class \c Body implementing the concept of parallel_reduce body must define: - - \code Body::Body( Body&, split ); \endcode Splitting constructor. - Must be able to run concurrently with operator() and method \c join - - \code Body::~Body(); \endcode Destructor - - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r - and accumulating the result - - \code void Body::join( Body& b ); \endcode Join results. - The result in \c b should be merged into the result of \c this -**/ - -/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) - TO BE DOCUMENTED -**/ - -/** \name parallel_reduce - See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ -//@{ - -//! Parallel iteration with reduction and default partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body ) { - start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); -} - -//! Parallel iteration with reduction and simple_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { - start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); -} - -//! Parallel iteration with reduction and auto_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { - start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); -} - -//! Parallel iteration with reduction and static_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { - start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); -} - -//! Parallel iteration with reduction and affinity_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { - start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); -} - -//! Parallel iteration with reduction, default partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { - start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); -} - -//! Parallel iteration with reduction, simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { - start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); -} - -//! Parallel iteration with reduction, auto_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { - start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); -} - -//! Parallel iteration with reduction, static_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { - start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); -} - -//! Parallel iteration with reduction, affinity_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { - start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); -} -/** parallel_reduce overloads that work with anonymous function objects - (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ - -//! Parallel iteration with reduction and default partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> - ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); - return body.result(); -} - -//! Parallel iteration with reduction and simple_partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const simple_partitioner& partitioner ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> - ::run(range, body, partitioner ); - return body.result(); -} - -//! Parallel iteration with reduction and auto_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const auto_partitioner& partitioner ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> - ::run( range, body, partitioner ); - return body.result(); -} - -//! Parallel iteration with reduction and static_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const static_partitioner& partitioner ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> - ::run( range, body, partitioner ); - return body.result(); -} - -//! Parallel iteration with reduction and affinity_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - affinity_partitioner& partitioner ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> - ::run( range, body, partitioner ); - return body.result(); -} - -//! Parallel iteration with reduction, default partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - task_group_context& context ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> - ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); - return body.result(); -} - -//! Parallel iteration with reduction, simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const simple_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> - ::run( range, body, partitioner, context ); - return body.result(); -} - -//! Parallel iteration with reduction, auto_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const auto_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> - ::run( range, body, partitioner, context ); - return body.result(); -} - -//! Parallel iteration with reduction, static_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const static_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> - ::run( range, body, partitioner, context ); - return body.result(); -} - -//! Parallel iteration with reduction, affinity_partitioner and user-supplied context -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - affinity_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> - ::run( range, body, partitioner, context ); - return body.result(); -} - -//! Parallel iteration with deterministic reduction and default simple partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body ) { - start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); -} - -//! Parallel iteration with deterministic reduction and simple partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { - start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); -} - -//! Parallel iteration with deterministic reduction and static partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { - start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); -} - -//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { - start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); -} - -//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { - start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); -} - -//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { - start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); -} - -/** parallel_reduce overloads that work with anonymous function objects - (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ - -//! Parallel iteration with deterministic reduction and default simple partitioner. -// TODO: consider making static_partitioner the default -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { - return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); -} - -//! Parallel iteration with deterministic reduction and simple partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { - lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); - start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> - ::run(range, body, partitioner); - return body.result(); -} - -//! Parallel iteration with deterministic reduction and static partitioner. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { - lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); - start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> - ::run(range, body, partitioner); - return body.result(); -} - -//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - task_group_context& context ) { - return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); -} - -//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const simple_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); - start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> - ::run(range, body, partitioner, context); - return body.result(); -} - -//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename RealBody, typename Reduction> -Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, - const static_partitioner& partitioner, task_group_context& context ) { - lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); - start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> - ::run(range, body, partitioner, context); - return body.result(); -} -//@} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::parallel_reduce; -using detail::d1::parallel_deterministic_reduce; -// Split types -using detail::split; -using detail::proportional_split; -} // namespace v1 - -} // namespace tbb -#endif /* __TBB_parallel_reduce_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_reduce_H +#define __TBB_parallel_reduce_H + +#include <new> +#include "detail/_namespace_injection.h" +#include "detail/_task.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#include "task_group.h" // task_group_context +#include "partitioner.h" +#include "profiling.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! Tree node type for parallel_reduce. +/** @ingroup algorithms */ +//TODO: consider folding tree via bypass execution(instead of manual folding) +// for better cancellation and critical tasks handling (performance measurements required). +template<typename Body> +struct reduction_tree_node : public tree_node { + tbb::detail::aligned_space<Body> zombie_space; + Body& left_body; + bool has_right_zombie{false}; + + reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : + tree_node{parent, ref_count, alloc}, + left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ + {} + + void join(task_group_context* context) { + if (has_right_zombie && !context->is_group_execution_cancelled()) + left_body.join(*zombie_space.begin()); + } + + ~reduction_tree_node() { + if( has_right_zombie ) zombie_space.begin()->~Body(); + } +}; + +//! Task type used to split the work of parallel_reduce. +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_reduce : public task { + Range my_range; + Body* my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + bool is_right_child; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + using tree_node_type = reduction_tree_node<Body>; + + //! Constructor reduce root task. + start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : + my_range(range), + my_body(&body), + my_partition(partitioner), + my_allocator(alloc), + is_right_child(false) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc), + is_right_child(true) + { + parent_.is_right_child = false; + } + //! Construct right child from the given range as response to the demand. + /** parent_ remains left child. Newly constructed object is right child. */ + start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : + my_range(r), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split()), + my_allocator(alloc), + is_right_child(true) + { + my_partition.align_depth( d ); + parent_.is_right_child = false; + } + static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + wait_node wn; + small_object_allocator alloc{}; + auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); + reduce_task->my_parent = &wn; + execute_and_wait(*reduce_task, context, wn.m_wait, context); + } + } + static void run(const Range& range, Body& body, Partitioner& partitioner) { + // Bound context prevents exceptions from body to affect nesting or sibling algorithms, + // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. + task_group_context context(PARALLEL_REDUCE); + run(range, body, partitioner, context); + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + (*my_body)(r); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } + //! spawn right task, serves as callback for partitioner + void offer_work(const Range& r, depth_t d, execution_data& ed) { + offer_work_impl(ed, *this, r, d); + } + +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... args) { + small_object_allocator alloc{}; + // New right child + auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); + + // New root node as a continuation and ref count. Left and right child attach to the new parent. + right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); + + // Spawn the right sibling + right_child->spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and wait object before an object destruction + node* parent = my_parent; + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_reduce(); + // Unwind the tree decrementing the parent`s reference count + fold_tree<tree_node_type>(parent, ed); + allocator.deallocate(this, ed); +} + +//! Execute parallel_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + + // The acquire barrier synchronizes the data pointed with my_body if the left + // task has already finished. + if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { + tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); + my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); + parent_ptr->has_right_zombie = true; + } + __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); + + my_partition.execute(*this, my_range, ed); + + finalize(ed); + return nullptr; +} + +//! Cancel parallel_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//! Tree node type for parallel_deterministic_reduce. +/** @ingroup algorithms */ +template<typename Body> +struct deterministic_reduction_tree_node : public tree_node { + Body right_body; + Body& left_body; + + deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : + tree_node{parent, ref_count, alloc}, + right_body{input_left_body, detail::split()}, + left_body(input_left_body) + {} + + void join(task_group_context* context) { + if (!context->is_group_execution_cancelled()) + left_body.join(right_body); + } +}; + +//! Task type used to split the work of parallel_deterministic_reduce. +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_deterministic_reduce : public task { + Range my_range; + Body& my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + using tree_node_type = deterministic_reduction_tree_node<Body>; + + //! Constructor deterministic_reduce root task. + start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : + my_range(range), + my_body(body), + my_partition(partitioner), + my_allocator(alloc) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, + small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc) {} + static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + wait_node wn; + small_object_allocator alloc{}; + auto deterministic_reduce_task = + alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); + deterministic_reduce_task->my_parent = &wn; + execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); + } + } + static void run(const Range& range, Body& body, Partitioner& partitioner) { + // Bound context prevents exceptions from body to affect nesting or sibling algorithms, + // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce + // in the try-block. + task_group_context context(PARALLEL_REDUCE); + run(range, body, partitioner, context); + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + my_body( r ); + } + //! Spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... args) { + small_object_allocator alloc{}; + // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. + auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); + + // New right child + auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); + + right_child->my_parent = my_parent = new_tree_node; + + // Spawn the right sibling + right_child->spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! Fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and wait object before an object destruction + node* parent = my_parent; + + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_deterministic_reduce(); + // Unwind the tree decrementing the parent`s reference count + fold_tree<tree_node_type>(parent, ed); + allocator.deallocate(this, ed); +} + +//! Execute parallel_deterministic_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + + my_partition.execute(*this, my_range, ed); + + finalize(ed); + return NULL; +} + +//! Cancel parallel_deterministic_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return NULL; +} + + +//! Auxiliary class for parallel_reduce; for internal use only. +/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" + using given \ref parallel_reduce_lambda_req "anonymous function objects". + **/ +/** @ingroup algorithms */ +template<typename Range, typename Value, typename RealBody, typename Reduction> +class lambda_reduce_body { +//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced +// (might require some performance measurements) + + const Value& my_identity_element; + const RealBody& my_real_body; + const Reduction& my_reduction; + Value my_value; + lambda_reduce_body& operator= ( const lambda_reduce_body& other ); +public: + lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) + : my_identity_element(identity) + , my_real_body(body) + , my_reduction(reduction) + , my_value(identity) + { } + lambda_reduce_body( const lambda_reduce_body& other ) = default; + lambda_reduce_body( lambda_reduce_body& other, tbb::split ) + : my_identity_element(other.my_identity_element) + , my_real_body(other.my_real_body) + , my_reduction(other.my_reduction) + , my_value(other.my_identity_element) + { } + void operator()(Range& range) { + my_value = my_real_body(range, const_cast<const Value&>(my_value)); + } + void join( lambda_reduce_body& rhs ) { + my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); + } + Value result() const { + return my_value; + } +}; + + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_reduce_body_req Requirements on parallel_reduce body + Class \c Body implementing the concept of parallel_reduce body must define: + - \code Body::Body( Body&, split ); \endcode Splitting constructor. + Must be able to run concurrently with operator() and method \c join + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r + and accumulating the result + - \code void Body::join( Body& b ); \endcode Join results. + The result in \c b should be merged into the result of \c this +**/ + +/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) + TO BE DOCUMENTED +**/ + +/** \name parallel_reduce + See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ +//@{ + +//! Parallel iteration with reduction and default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body ) { + start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); +} + +//! Parallel iteration with reduction and simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { + start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and static_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { + start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and affinity_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { + start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { + start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); +} + +//! Parallel iteration with reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, auto_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, static_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, affinity_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); +} +/** parallel_reduce overloads that work with anonymous function objects + (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ + +//! Parallel iteration with reduction and default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> + ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); + return body.result(); +} + +//! Parallel iteration with reduction and simple_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> + ::run(range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const auto_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and static_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and affinity_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + affinity_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> + ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); + return body.result(); +} + +//! Parallel iteration with reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, auto_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const auto_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, static_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, affinity_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + affinity_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with deterministic reduction and default simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); +} + +//! Parallel iteration with deterministic reduction and simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); +} + +//! Parallel iteration with deterministic reduction and static partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { + start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); +} + +//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { + start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); +} + +//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); +} + +/** parallel_reduce overloads that work with anonymous function objects + (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ + +//! Parallel iteration with deterministic reduction and default simple partitioner. +// TODO: consider making static_partitioner the default +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { + return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); +} + +//! Parallel iteration with deterministic reduction and simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> + ::run(range, body, partitioner); + return body.result(); +} + +//! Parallel iteration with deterministic reduction and static partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> + ::run(range, body, partitioner); + return body.result(); +} + +//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + task_group_context& context ) { + return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); +} + +//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> + ::run(range, body, partitioner, context); + return body.result(); +} + +//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> + ::run(range, body, partitioner, context); + return body.result(); +} +//@} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_reduce; +using detail::d1::parallel_deterministic_reduce; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb +#endif /* __TBB_parallel_reduce_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h index d5d69ca0b2..45bf6a2352 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h @@ -1,590 +1,590 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_scan_H -#define __TBB_parallel_scan_H - -#include <functional> - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_exception.h" -#include "detail/_task.h" - -#include "profiling.h" -#include "partitioner.h" -#include "blocked_range.h" -#include "task_group.h" - -namespace tbb { -namespace detail { -namespace d1 { - -//! Used to indicate that the initial scan is being performed. -/** @ingroup algorithms */ -struct pre_scan_tag { - static bool is_final_scan() {return false;} - operator bool() {return is_final_scan();} -}; - -//! Used to indicate that the final scan is being performed. -/** @ingroup algorithms */ -struct final_scan_tag { - static bool is_final_scan() {return true;} - operator bool() {return is_final_scan();} -}; - -template<typename Range, typename Body> -struct sum_node; - -//! Performs final scan for a leaf -/** @ingroup algorithms */ -template<typename Range, typename Body> -struct final_sum : public task { -private: - using sum_node_type = sum_node<Range, Body>; - Body m_body; - aligned_space<Range> m_range; - //! Where to put result of last subrange, or nullptr if not last subrange. - Body* m_stuff_last; - - wait_context& m_wait_context; - sum_node_type* m_parent = nullptr; -public: - small_object_allocator m_allocator; - final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) : - m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) { - poison_pointer(m_stuff_last); - } - - final_sum( final_sum& sum, small_object_allocator& alloc ) : - m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) { - poison_pointer(m_stuff_last); - } - - ~final_sum() { - m_range.begin()->~Range(); - } - void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) { - __TBB_ASSERT( m_parent == nullptr, nullptr ); - m_parent = parent; - new( m_range.begin() ) Range(range); - m_stuff_last = stuff_last; - } -private: - sum_node_type* release_parent() { - call_itt_task_notify(releasing, m_parent); - if (m_parent) { - auto parent = m_parent; - m_parent = nullptr; - if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { - return parent; - } - } - else - m_wait_context.release(); - return nullptr; - } - sum_node_type* finalize(const execution_data& ed){ - sum_node_type* next_task = release_parent(); - m_allocator.delete_object<final_sum>(this, ed); - return next_task; - } - -public: - task* execute(execution_data& ed) override { - m_body( *m_range.begin(), final_scan_tag() ); - if( m_stuff_last ) - m_stuff_last->assign(m_body); - - return finalize(ed); - } - task* cancel(execution_data& ed) override { - return finalize(ed); - } - template<typename Tag> - void operator()( const Range& r, Tag tag ) { - m_body( r, tag ); - } - void reverse_join( final_sum& a ) { - m_body.reverse_join(a.m_body); - } - void reverse_join( Body& body ) { - m_body.reverse_join(body); - } - void assign_to( Body& body ) { - body.assign(m_body); - } - void self_destroy(const execution_data& ed) { - m_allocator.delete_object<final_sum>(this, ed); - } -}; - -//! Split work to be done in the scan. -/** @ingroup algorithms */ -template<typename Range, typename Body> -struct sum_node : public task { -private: - using final_sum_type = final_sum<Range,Body>; -public: - final_sum_type *m_incoming; - final_sum_type *m_body; - Body *m_stuff_last; -private: - final_sum_type *m_left_sum; - sum_node *m_left; - sum_node *m_right; - bool m_left_is_final; - Range m_range; - wait_context& m_wait_context; - sum_node* m_parent; - small_object_allocator m_allocator; -public: - std::atomic<unsigned int> ref_count{0}; - sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) : - m_stuff_last(nullptr), - m_left_sum(nullptr), - m_left(nullptr), - m_right(nullptr), - m_left_is_final(left_is_final_), - m_range(range), - m_wait_context(w_o), - m_parent(parent), - m_allocator(alloc) - { - if( m_parent ) - m_parent->ref_count.fetch_add(1, std::memory_order_relaxed); - // Poison fields that will be set by second pass. - poison_pointer(m_body); - poison_pointer(m_incoming); - } - - ~sum_node() { - if (m_parent) - m_parent->ref_count.fetch_sub(1, std::memory_order_relaxed); - } -private: - sum_node* release_parent() { - call_itt_task_notify(releasing, m_parent); - if (m_parent) { - auto parent = m_parent; - m_parent = nullptr; - if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { - return parent; - } - } - else - m_wait_context.release(); - return nullptr; - } - task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) { - if( child ) { - __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr ); - child->prepare_for_execution(body, incoming, stuff_last); - return child; - } else { - body.finish_construction(this, range, stuff_last); - return &body; - } - } - - sum_node* finalize(const execution_data& ed) { - sum_node* next_task = release_parent(); - m_allocator.delete_object<sum_node>(this, ed); - return next_task; - } - -public: - void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) { - this->m_body = &body; - this->m_incoming = incoming; - this->m_stuff_last = stuff_last; - } - task* execute(execution_data& ed) override { - if( m_body ) { - if( m_incoming ) - m_left_sum->reverse_join( *m_incoming ); - task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last); - task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr); - ref_count = (left_child != nullptr) + (right_child != nullptr); - m_body = nullptr; - if( left_child ) { - spawn(*right_child, *ed.context); - return left_child; - } else { - return right_child; - } - } else { - return finalize(ed); - } - } - task* cancel(execution_data& ed) override { - return finalize(ed); - } - void self_destroy(const execution_data& ed) { - m_allocator.delete_object<sum_node>(this, ed); - } - template<typename range,typename body,typename partitioner> - friend struct start_scan; - - template<typename range,typename body> - friend struct finish_scan; -}; - -//! Combine partial results -/** @ingroup algorithms */ -template<typename Range, typename Body> -struct finish_scan : public task { -private: - using sum_node_type = sum_node<Range,Body>; - using final_sum_type = final_sum<Range,Body>; - final_sum_type** const m_sum_slot; - sum_node_type*& m_return_slot; - small_object_allocator m_allocator; -public: - final_sum_type* m_right_zombie; - sum_node_type& m_result; - std::atomic<unsigned int> ref_count{2}; - finish_scan* m_parent; - wait_context& m_wait_context; - task* execute(execution_data& ed) override { - __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr ); - if( m_result.m_left ) - m_result.m_left_is_final = false; - if( m_right_zombie && m_sum_slot ) - (*m_sum_slot)->reverse_join(*m_result.m_left_sum); - __TBB_ASSERT( !m_return_slot, nullptr ); - if( m_right_zombie || m_result.m_right ) { - m_return_slot = &m_result; - } else { - m_result.self_destroy(ed); - } - if( m_right_zombie && !m_sum_slot && !m_result.m_right ) { - m_right_zombie->self_destroy(ed); - m_right_zombie = nullptr; - } - return finalize(ed); - } - task* cancel(execution_data& ed) override { - return finalize(ed); - } - finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) : - m_sum_slot(sum), - m_return_slot(return_slot), - m_allocator(alloc), - m_right_zombie(nullptr), - m_result(result_), - m_parent(parent), - m_wait_context(w_o) - { - __TBB_ASSERT( !m_return_slot, nullptr ); - } -private: - finish_scan* release_parent() { - call_itt_task_notify(releasing, m_parent); - if (m_parent) { - auto parent = m_parent; - m_parent = nullptr; - if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { - return parent; - } - } - else - m_wait_context.release(); - return nullptr; - } - finish_scan* finalize(const execution_data& ed) { - finish_scan* next_task = release_parent(); - m_allocator.delete_object<finish_scan>(this, ed); - return next_task; - } -}; - -//! Initial task to split the work -/** @ingroup algorithms */ -template<typename Range, typename Body, typename Partitioner> -struct start_scan : public task { -private: - using sum_node_type = sum_node<Range,Body>; - using final_sum_type = final_sum<Range,Body>; - using finish_pass1_type = finish_scan<Range,Body>; - std::reference_wrapper<sum_node_type*> m_return_slot; - Range m_range; - std::reference_wrapper<final_sum_type> m_body; - typename Partitioner::partition_type m_partition; - /** Non-null if caller is requesting total. */ - final_sum_type** m_sum_slot; - bool m_is_final; - bool m_is_right_child; - - finish_pass1_type* m_parent; - small_object_allocator m_allocator; - wait_context& m_wait_context; - - finish_pass1_type* release_parent() { - call_itt_task_notify(releasing, m_parent); - if (m_parent) { - auto parent = m_parent; - m_parent = nullptr; - if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { - return parent; - } - } - else - m_wait_context.release(); - return nullptr; - } - - finish_pass1_type* finalize( const execution_data& ed ) { - finish_pass1_type* next_task = release_parent(); - m_allocator.delete_object<start_scan>(this, ed); - return next_task; - } - -public: - task* execute( execution_data& ) override; - task* cancel( execution_data& ed ) override { - return finalize(ed); - } - start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) : - m_return_slot(return_slot), - m_range(parent.m_range,split()), - m_body(parent.m_body), - m_partition(parent.m_partition,split()), - m_sum_slot(parent.m_sum_slot), - m_is_final(parent.m_is_final), - m_is_right_child(true), - m_parent(parent.m_parent), - m_allocator(alloc), - m_wait_context(parent.m_wait_context) - { - __TBB_ASSERT( !m_return_slot, nullptr ); - parent.m_is_right_child = false; - } - - start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) : - m_return_slot(return_slot), - m_range(range), - m_body(body), - m_partition(partitioner), - m_sum_slot(nullptr), - m_is_final(true), - m_is_right_child(false), - m_parent(nullptr), - m_allocator(alloc), - m_wait_context(w_o) - { - __TBB_ASSERT( !m_return_slot, nullptr ); - } - - static void run( const Range& range, Body& body, const Partitioner& partitioner ) { - if( !range.empty() ) { - task_group_context context(PARALLEL_SCAN); - - using start_pass1_type = start_scan<Range,Body,Partitioner>; - sum_node_type* root = nullptr; - wait_context w_ctx{1}; - small_object_allocator alloc{}; - - auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc); - temp_body.reverse_join(body); - - auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc); - - execute_and_wait(pass1, context, w_ctx, context); - if( root ) { - root->prepare_for_execution(temp_body, nullptr, &body); - w_ctx.reserve(); - execute_and_wait(*root, context, w_ctx, context); - } else { - temp_body.assign_to(body); - temp_body.finish_construction(nullptr, range, nullptr); - alloc.delete_object<final_sum_type>(&temp_body); - } - } - } -}; - -template<typename Range, typename Body, typename Partitioner> -task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) { - // Inspecting m_parent->result.left_sum would ordinarily be a race condition. - // But we inspect it only if we are not a stolen task, in which case we - // know that task assigning to m_parent->result.left_sum has completed. - __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan"); - bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum); - if( treat_as_stolen ) { - // Invocation is for right child that has been really stolen or needs to be virtually stolen - small_object_allocator alloc{}; - m_parent->m_right_zombie = alloc.new_object<final_sum_type>(m_body, alloc); - m_body = *m_parent->m_right_zombie; - m_is_final = false; - } - task* next_task = nullptr; - if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) { - if( m_is_final ) - m_body(m_range, final_scan_tag()); - else if( m_sum_slot ) - m_body(m_range, pre_scan_tag()); - if( m_sum_slot ) - *m_sum_slot = &m_body.get(); - __TBB_ASSERT( !m_return_slot, nullptr ); - - next_task = finalize(ed); - } else { - small_object_allocator alloc{}; - auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc); - - auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc); - m_parent = new_parent; - - // Split off right child - auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc); - - spawn(right_child, *ed.context); - - m_sum_slot = &result->m_left_sum; - m_return_slot = result->m_left; - - __TBB_ASSERT( !m_return_slot, nullptr ); - next_task = this; - } - return next_task; -} - -template<typename Range, typename Value, typename Scan, typename ReverseJoin> -class lambda_scan_body { - Value m_sum_slot; - const Value& identity_element; - const Scan& m_scan; - const ReverseJoin& m_reverse_join; -public: - void operator=(const lambda_scan_body&) = delete; - lambda_scan_body(const lambda_scan_body&) = default; - - lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join ) - : m_sum_slot(identity) - , identity_element(identity) - , m_scan(scan) - , m_reverse_join(rev_join) {} - - lambda_scan_body( lambda_scan_body& b, split ) - : m_sum_slot(b.identity_element) - , identity_element(b.identity_element) - , m_scan(b.m_scan) - , m_reverse_join(b.m_reverse_join) {} - - template<typename Tag> - void operator()( const Range& r, Tag tag ) { - m_sum_slot = m_scan(r, m_sum_slot, tag); - } - - void reverse_join( lambda_scan_body& a ) { - m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot); - } - - void assign( lambda_scan_body& b ) { - m_sum_slot = b.m_sum_slot; - } - - Value result() const { - return m_sum_slot; - } -}; - -// Requirements on Range concept are documented in blocked_range.h - -/** \page parallel_scan_body_req Requirements on parallel_scan body - Class \c Body implementing the concept of parallel_scan body must define: - - \code Body::Body( Body&, split ); \endcode Splitting constructor. - Split \c b so that \c this and \c b can accumulate separately - - \code Body::~Body(); \endcode Destructor - - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode - Preprocess iterations for range \c r - - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode - Do final processing for iterations of range \c r - - \code void Body::reverse_join( Body& a ); \endcode - Merge preprocessing state of \c a into \c this, where \c a was - created earlier from \c b by b's splitting constructor -**/ - -/** \name parallel_scan - See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ -//@{ - -//! Parallel prefix with default partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_scan( const Range& range, Body& body ) { - start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER()); -} - -//! Parallel prefix with simple_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { - start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner); -} - -//! Parallel prefix with auto_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Body> -void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { - start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner); -} - -//! Parallel prefix with default partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename Scan, typename ReverseJoin> -Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { - lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); - parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER()); - return body.result(); -} - -//! Parallel prefix with simple_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename Scan, typename ReverseJoin> -Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, - const simple_partitioner& partitioner ) { - lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); - parallel_scan(range, body, partitioner); - return body.result(); -} - -//! Parallel prefix with auto_partitioner -/** @ingroup algorithms **/ -template<typename Range, typename Value, typename Scan, typename ReverseJoin> -Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, - const auto_partitioner& partitioner ) { - lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); - parallel_scan(range, body, partitioner); - return body.result(); -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::d1::parallel_scan; - using detail::d1::pre_scan_tag; - using detail::d1::final_scan_tag; - -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_parallel_scan_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_scan_H +#define __TBB_parallel_scan_H + +#include <functional> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" + +#include "profiling.h" +#include "partitioner.h" +#include "blocked_range.h" +#include "task_group.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! Used to indicate that the initial scan is being performed. +/** @ingroup algorithms */ +struct pre_scan_tag { + static bool is_final_scan() {return false;} + operator bool() {return is_final_scan();} +}; + +//! Used to indicate that the final scan is being performed. +/** @ingroup algorithms */ +struct final_scan_tag { + static bool is_final_scan() {return true;} + operator bool() {return is_final_scan();} +}; + +template<typename Range, typename Body> +struct sum_node; + +//! Performs final scan for a leaf +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct final_sum : public task { +private: + using sum_node_type = sum_node<Range, Body>; + Body m_body; + aligned_space<Range> m_range; + //! Where to put result of last subrange, or nullptr if not last subrange. + Body* m_stuff_last; + + wait_context& m_wait_context; + sum_node_type* m_parent = nullptr; +public: + small_object_allocator m_allocator; + final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) : + m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) { + poison_pointer(m_stuff_last); + } + + final_sum( final_sum& sum, small_object_allocator& alloc ) : + m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) { + poison_pointer(m_stuff_last); + } + + ~final_sum() { + m_range.begin()->~Range(); + } + void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) { + __TBB_ASSERT( m_parent == nullptr, nullptr ); + m_parent = parent; + new( m_range.begin() ) Range(range); + m_stuff_last = stuff_last; + } +private: + sum_node_type* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + sum_node_type* finalize(const execution_data& ed){ + sum_node_type* next_task = release_parent(); + m_allocator.delete_object<final_sum>(this, ed); + return next_task; + } + +public: + task* execute(execution_data& ed) override { + m_body( *m_range.begin(), final_scan_tag() ); + if( m_stuff_last ) + m_stuff_last->assign(m_body); + + return finalize(ed); + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + template<typename Tag> + void operator()( const Range& r, Tag tag ) { + m_body( r, tag ); + } + void reverse_join( final_sum& a ) { + m_body.reverse_join(a.m_body); + } + void reverse_join( Body& body ) { + m_body.reverse_join(body); + } + void assign_to( Body& body ) { + body.assign(m_body); + } + void self_destroy(const execution_data& ed) { + m_allocator.delete_object<final_sum>(this, ed); + } +}; + +//! Split work to be done in the scan. +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct sum_node : public task { +private: + using final_sum_type = final_sum<Range,Body>; +public: + final_sum_type *m_incoming; + final_sum_type *m_body; + Body *m_stuff_last; +private: + final_sum_type *m_left_sum; + sum_node *m_left; + sum_node *m_right; + bool m_left_is_final; + Range m_range; + wait_context& m_wait_context; + sum_node* m_parent; + small_object_allocator m_allocator; +public: + std::atomic<unsigned int> ref_count{0}; + sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) : + m_stuff_last(nullptr), + m_left_sum(nullptr), + m_left(nullptr), + m_right(nullptr), + m_left_is_final(left_is_final_), + m_range(range), + m_wait_context(w_o), + m_parent(parent), + m_allocator(alloc) + { + if( m_parent ) + m_parent->ref_count.fetch_add(1, std::memory_order_relaxed); + // Poison fields that will be set by second pass. + poison_pointer(m_body); + poison_pointer(m_incoming); + } + + ~sum_node() { + if (m_parent) + m_parent->ref_count.fetch_sub(1, std::memory_order_relaxed); + } +private: + sum_node* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) { + if( child ) { + __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr ); + child->prepare_for_execution(body, incoming, stuff_last); + return child; + } else { + body.finish_construction(this, range, stuff_last); + return &body; + } + } + + sum_node* finalize(const execution_data& ed) { + sum_node* next_task = release_parent(); + m_allocator.delete_object<sum_node>(this, ed); + return next_task; + } + +public: + void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) { + this->m_body = &body; + this->m_incoming = incoming; + this->m_stuff_last = stuff_last; + } + task* execute(execution_data& ed) override { + if( m_body ) { + if( m_incoming ) + m_left_sum->reverse_join( *m_incoming ); + task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last); + task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr); + ref_count = (left_child != nullptr) + (right_child != nullptr); + m_body = nullptr; + if( left_child ) { + spawn(*right_child, *ed.context); + return left_child; + } else { + return right_child; + } + } else { + return finalize(ed); + } + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + void self_destroy(const execution_data& ed) { + m_allocator.delete_object<sum_node>(this, ed); + } + template<typename range,typename body,typename partitioner> + friend struct start_scan; + + template<typename range,typename body> + friend struct finish_scan; +}; + +//! Combine partial results +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct finish_scan : public task { +private: + using sum_node_type = sum_node<Range,Body>; + using final_sum_type = final_sum<Range,Body>; + final_sum_type** const m_sum_slot; + sum_node_type*& m_return_slot; + small_object_allocator m_allocator; +public: + final_sum_type* m_right_zombie; + sum_node_type& m_result; + std::atomic<unsigned int> ref_count{2}; + finish_scan* m_parent; + wait_context& m_wait_context; + task* execute(execution_data& ed) override { + __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr ); + if( m_result.m_left ) + m_result.m_left_is_final = false; + if( m_right_zombie && m_sum_slot ) + (*m_sum_slot)->reverse_join(*m_result.m_left_sum); + __TBB_ASSERT( !m_return_slot, nullptr ); + if( m_right_zombie || m_result.m_right ) { + m_return_slot = &m_result; + } else { + m_result.self_destroy(ed); + } + if( m_right_zombie && !m_sum_slot && !m_result.m_right ) { + m_right_zombie->self_destroy(ed); + m_right_zombie = nullptr; + } + return finalize(ed); + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) : + m_sum_slot(sum), + m_return_slot(return_slot), + m_allocator(alloc), + m_right_zombie(nullptr), + m_result(result_), + m_parent(parent), + m_wait_context(w_o) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + } +private: + finish_scan* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + finish_scan* finalize(const execution_data& ed) { + finish_scan* next_task = release_parent(); + m_allocator.delete_object<finish_scan>(this, ed); + return next_task; + } +}; + +//! Initial task to split the work +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_scan : public task { +private: + using sum_node_type = sum_node<Range,Body>; + using final_sum_type = final_sum<Range,Body>; + using finish_pass1_type = finish_scan<Range,Body>; + std::reference_wrapper<sum_node_type*> m_return_slot; + Range m_range; + std::reference_wrapper<final_sum_type> m_body; + typename Partitioner::partition_type m_partition; + /** Non-null if caller is requesting total. */ + final_sum_type** m_sum_slot; + bool m_is_final; + bool m_is_right_child; + + finish_pass1_type* m_parent; + small_object_allocator m_allocator; + wait_context& m_wait_context; + + finish_pass1_type* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + + finish_pass1_type* finalize( const execution_data& ed ) { + finish_pass1_type* next_task = release_parent(); + m_allocator.delete_object<start_scan>(this, ed); + return next_task; + } + +public: + task* execute( execution_data& ) override; + task* cancel( execution_data& ed ) override { + return finalize(ed); + } + start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) : + m_return_slot(return_slot), + m_range(parent.m_range,split()), + m_body(parent.m_body), + m_partition(parent.m_partition,split()), + m_sum_slot(parent.m_sum_slot), + m_is_final(parent.m_is_final), + m_is_right_child(true), + m_parent(parent.m_parent), + m_allocator(alloc), + m_wait_context(parent.m_wait_context) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + parent.m_is_right_child = false; + } + + start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) : + m_return_slot(return_slot), + m_range(range), + m_body(body), + m_partition(partitioner), + m_sum_slot(nullptr), + m_is_final(true), + m_is_right_child(false), + m_parent(nullptr), + m_allocator(alloc), + m_wait_context(w_o) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + } + + static void run( const Range& range, Body& body, const Partitioner& partitioner ) { + if( !range.empty() ) { + task_group_context context(PARALLEL_SCAN); + + using start_pass1_type = start_scan<Range,Body,Partitioner>; + sum_node_type* root = nullptr; + wait_context w_ctx{1}; + small_object_allocator alloc{}; + + auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc); + temp_body.reverse_join(body); + + auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc); + + execute_and_wait(pass1, context, w_ctx, context); + if( root ) { + root->prepare_for_execution(temp_body, nullptr, &body); + w_ctx.reserve(); + execute_and_wait(*root, context, w_ctx, context); + } else { + temp_body.assign_to(body); + temp_body.finish_construction(nullptr, range, nullptr); + alloc.delete_object<final_sum_type>(&temp_body); + } + } + } +}; + +template<typename Range, typename Body, typename Partitioner> +task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) { + // Inspecting m_parent->result.left_sum would ordinarily be a race condition. + // But we inspect it only if we are not a stolen task, in which case we + // know that task assigning to m_parent->result.left_sum has completed. + __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan"); + bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum); + if( treat_as_stolen ) { + // Invocation is for right child that has been really stolen or needs to be virtually stolen + small_object_allocator alloc{}; + m_parent->m_right_zombie = alloc.new_object<final_sum_type>(m_body, alloc); + m_body = *m_parent->m_right_zombie; + m_is_final = false; + } + task* next_task = nullptr; + if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) { + if( m_is_final ) + m_body(m_range, final_scan_tag()); + else if( m_sum_slot ) + m_body(m_range, pre_scan_tag()); + if( m_sum_slot ) + *m_sum_slot = &m_body.get(); + __TBB_ASSERT( !m_return_slot, nullptr ); + + next_task = finalize(ed); + } else { + small_object_allocator alloc{}; + auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc); + + auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc); + m_parent = new_parent; + + // Split off right child + auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc); + + spawn(right_child, *ed.context); + + m_sum_slot = &result->m_left_sum; + m_return_slot = result->m_left; + + __TBB_ASSERT( !m_return_slot, nullptr ); + next_task = this; + } + return next_task; +} + +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +class lambda_scan_body { + Value m_sum_slot; + const Value& identity_element; + const Scan& m_scan; + const ReverseJoin& m_reverse_join; +public: + void operator=(const lambda_scan_body&) = delete; + lambda_scan_body(const lambda_scan_body&) = default; + + lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join ) + : m_sum_slot(identity) + , identity_element(identity) + , m_scan(scan) + , m_reverse_join(rev_join) {} + + lambda_scan_body( lambda_scan_body& b, split ) + : m_sum_slot(b.identity_element) + , identity_element(b.identity_element) + , m_scan(b.m_scan) + , m_reverse_join(b.m_reverse_join) {} + + template<typename Tag> + void operator()( const Range& r, Tag tag ) { + m_sum_slot = m_scan(r, m_sum_slot, tag); + } + + void reverse_join( lambda_scan_body& a ) { + m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot); + } + + void assign( lambda_scan_body& b ) { + m_sum_slot = b.m_sum_slot; + } + + Value result() const { + return m_sum_slot; + } +}; + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_scan_body_req Requirements on parallel_scan body + Class \c Body implementing the concept of parallel_scan body must define: + - \code Body::Body( Body&, split ); \endcode Splitting constructor. + Split \c b so that \c this and \c b can accumulate separately + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode + Preprocess iterations for range \c r + - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode + Do final processing for iterations of range \c r + - \code void Body::reverse_join( Body& a ); \endcode + Merge preprocessing state of \c a into \c this, where \c a was + created earlier from \c b by b's splitting constructor +**/ + +/** \name parallel_scan + See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ +//@{ + +//! Parallel prefix with default partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body ) { + start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER()); +} + +//! Parallel prefix with simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner); +} + +//! Parallel prefix with auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { + start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner); +} + +//! Parallel prefix with default partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER()); + return body.result(); +} + +//! Parallel prefix with simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, + const simple_partitioner& partitioner ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, partitioner); + return body.result(); +} + +//! Parallel prefix with auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, + const auto_partitioner& partitioner ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, partitioner); + return body.result(); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::parallel_scan; + using detail::d1::pre_scan_tag; + using detail::d1::final_scan_tag; + +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_scan_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h index 0e7be5e25b..eaaa89707a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h @@ -1,247 +1,247 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_parallel_sort_H -#define __TBB_parallel_sort_H - -#include "detail/_namespace_injection.h" -#include "parallel_for.h" -#include "blocked_range.h" -#include "profiling.h" - -#include <algorithm> -#include <iterator> -#include <functional> -#include <cstddef> - -namespace tbb { -namespace detail { -namespace d1 { - -//! Range used in quicksort to split elements into subranges based on a value. -/** The split operation selects a splitter and places all elements less than or equal - to the value in the first range and the remaining elements in the second range. - @ingroup algorithms */ -template<typename RandomAccessIterator, typename Compare> -class quick_sort_range { - std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const { - return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) ) - : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) ); - } - - std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const { - std::size_t offset = range.size / 8u; - return median_of_three(array, - median_of_three(array, 0 , offset, offset * 2), - median_of_three(array, offset * 3, offset * 4, offset * 5), - median_of_three(array, offset * 6, offset * 7, range.size - 1)); - - } - - std::size_t split_range( quick_sort_range& range ) { - RandomAccessIterator array = range.begin; - RandomAccessIterator first_element = range.begin; - std::size_t m = pseudo_median_of_nine(array, range); - if( m != 0 ) std::iter_swap(array, array + m); - - std::size_t i = 0; - std::size_t j = range.size; - // Partition interval [i + 1,j - 1] with key *first_element. - for(;;) { - __TBB_ASSERT( i < j, nullptr ); - // Loop must terminate since array[l] == *first_element. - do { - --j; - __TBB_ASSERT( i <= j, "bad ordering relation?" ); - } while( comp(*first_element, array[j]) ); - do { - __TBB_ASSERT( i <= j, nullptr ); - if( i == j ) goto partition; - ++i; - } while( comp(array[i], *first_element) ); - if( i == j ) goto partition; - std::iter_swap(array + i, array + j); - } -partition: - // Put the partition key were it belongs - std::iter_swap(array + j, first_element); - // array[l..j) is less or equal to key. - // array(j..r) is greater or equal to key. - // array[j] is equal to key - i = j + 1; - std::size_t new_range_size = range.size - i; - range.size = j; - return new_range_size; - } - -public: - quick_sort_range() = default; - quick_sort_range( const quick_sort_range& ) = default; - void operator=( const quick_sort_range& ) = delete; - - static constexpr std::size_t grainsize = 500; - const Compare& comp; - std::size_t size; - RandomAccessIterator begin; - - quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) : - comp(comp_), size(size_), begin(begin_) {} - - bool empty() const { return size == 0; } - bool is_divisible() const { return size >= grainsize; } - - quick_sort_range( quick_sort_range& range, split ) - : comp(range.comp) - , size(split_range(range)) - // +1 accounts for the pivot element, which is at its correct place - // already and, therefore, is not included into subranges. - , begin(range.begin + range.size + 1) {} -}; - -//! Body class used to test if elements in a range are presorted -/** @ingroup algorithms */ -template<typename RandomAccessIterator, typename Compare> -class quick_sort_pretest_body { - const Compare& comp; - task_group_context& context; - -public: - quick_sort_pretest_body() = default; - quick_sort_pretest_body( const quick_sort_pretest_body& ) = default; - void operator=( const quick_sort_pretest_body& ) = delete; - - quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {} - - void operator()( const blocked_range<RandomAccessIterator>& range ) const { - RandomAccessIterator my_end = range.end(); - - int i = 0; - //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements) - for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) { - if( i % 64 == 0 && context.is_group_execution_cancelled() ) break; - - // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 - if( comp(*(k), *(k - 1)) ) { - context.cancel_group_execution(); - break; - } - } - } -}; - -//! Body class used to sort elements in a range that is smaller than the grainsize. -/** @ingroup algorithms */ -template<typename RandomAccessIterator, typename Compare> -struct quick_sort_body { - void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const { - std::sort(range.begin, range.begin + range.size, range.comp); - } -}; - -//! Method to perform parallel_for based quick sort. -/** @ingroup algorithms */ -template<typename RandomAccessIterator, typename Compare> -void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { - parallel_for(quick_sort_range<RandomAccessIterator,Compare>(begin, end - begin, comp), - quick_sort_body<RandomAccessIterator,Compare>(), - auto_partitioner()); -} - -//! Wrapper method to initiate the sort by calling parallel_for. -/** @ingroup algorithms */ -template<typename RandomAccessIterator, typename Compare> -void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { - task_group_context my_context(PARALLEL_SORT); - constexpr int serial_cutoff = 9; - - __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" ); - RandomAccessIterator k = begin; - for( ; k != begin + serial_cutoff; ++k ) { - if( comp(*(k + 1), *k) ) { - do_parallel_quick_sort(begin, end, comp); - } - } - - // Check is input range already sorted - parallel_for(blocked_range<RandomAccessIterator>(k + 1, end), - quick_sort_pretest_body<RandomAccessIterator, Compare>(comp, my_context), - auto_partitioner(), - my_context); - - if( my_context.is_group_execution_cancelled() ) - do_parallel_quick_sort(begin, end, comp); -} - -/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort - Requirements on the iterator type \c It and its value type \c T for \c parallel_sort: - - - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given - iterators \c a and \c b are pointing to. \c It should be a random access iterator. - - - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y; -**/ - -/** \name parallel_sort - See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/ -//@{ - -//! Sorts the data in [begin,end) using the given comparator -/** The compare function object is used for all comparisons between elements during sorting. - The compare object must define a bool operator() function. - @ingroup algorithms **/ -template<typename RandomAccessIterator, typename Compare> -void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { - constexpr int min_parallel_size = 500; - if( end > begin ) { - if( end - begin < min_parallel_size ) { - std::sort(begin, end, comp); - } else { - parallel_quick_sort(begin, end, comp); - } - } -} - -//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator> -/** @ingroup algorithms **/ -template<typename RandomAccessIterator> -void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { - parallel_sort(begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>()); -} - -//! Sorts the data in rng using the given comparator -/** @ingroup algorithms **/ -template<typename Range, typename Compare> -void parallel_sort( Range& rng, const Compare& comp ) { - parallel_sort(std::begin(rng), std::end(rng), comp); -} - -//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator> -/** @ingroup algorithms **/ -template<typename Range> -void parallel_sort( Range& rng ) { - parallel_sort(std::begin(rng), std::end(rng)); -} -//@} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::d1::parallel_sort; -} // namespace v1 -} // namespace tbb - -#endif /*__TBB_parallel_sort_H*/ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_sort_H +#define __TBB_parallel_sort_H + +#include "detail/_namespace_injection.h" +#include "parallel_for.h" +#include "blocked_range.h" +#include "profiling.h" + +#include <algorithm> +#include <iterator> +#include <functional> +#include <cstddef> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Range used in quicksort to split elements into subranges based on a value. +/** The split operation selects a splitter and places all elements less than or equal + to the value in the first range and the remaining elements in the second range. + @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +class quick_sort_range { + std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const { + return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) ) + : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) ); + } + + std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const { + std::size_t offset = range.size / 8u; + return median_of_three(array, + median_of_three(array, 0 , offset, offset * 2), + median_of_three(array, offset * 3, offset * 4, offset * 5), + median_of_three(array, offset * 6, offset * 7, range.size - 1)); + + } + + std::size_t split_range( quick_sort_range& range ) { + RandomAccessIterator array = range.begin; + RandomAccessIterator first_element = range.begin; + std::size_t m = pseudo_median_of_nine(array, range); + if( m != 0 ) std::iter_swap(array, array + m); + + std::size_t i = 0; + std::size_t j = range.size; + // Partition interval [i + 1,j - 1] with key *first_element. + for(;;) { + __TBB_ASSERT( i < j, nullptr ); + // Loop must terminate since array[l] == *first_element. + do { + --j; + __TBB_ASSERT( i <= j, "bad ordering relation?" ); + } while( comp(*first_element, array[j]) ); + do { + __TBB_ASSERT( i <= j, nullptr ); + if( i == j ) goto partition; + ++i; + } while( comp(array[i], *first_element) ); + if( i == j ) goto partition; + std::iter_swap(array + i, array + j); + } +partition: + // Put the partition key were it belongs + std::iter_swap(array + j, first_element); + // array[l..j) is less or equal to key. + // array(j..r) is greater or equal to key. + // array[j] is equal to key + i = j + 1; + std::size_t new_range_size = range.size - i; + range.size = j; + return new_range_size; + } + +public: + quick_sort_range() = default; + quick_sort_range( const quick_sort_range& ) = default; + void operator=( const quick_sort_range& ) = delete; + + static constexpr std::size_t grainsize = 500; + const Compare& comp; + std::size_t size; + RandomAccessIterator begin; + + quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) : + comp(comp_), size(size_), begin(begin_) {} + + bool empty() const { return size == 0; } + bool is_divisible() const { return size >= grainsize; } + + quick_sort_range( quick_sort_range& range, split ) + : comp(range.comp) + , size(split_range(range)) + // +1 accounts for the pivot element, which is at its correct place + // already and, therefore, is not included into subranges. + , begin(range.begin + range.size + 1) {} +}; + +//! Body class used to test if elements in a range are presorted +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +class quick_sort_pretest_body { + const Compare& comp; + task_group_context& context; + +public: + quick_sort_pretest_body() = default; + quick_sort_pretest_body( const quick_sort_pretest_body& ) = default; + void operator=( const quick_sort_pretest_body& ) = delete; + + quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {} + + void operator()( const blocked_range<RandomAccessIterator>& range ) const { + RandomAccessIterator my_end = range.end(); + + int i = 0; + //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements) + for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) { + if( i % 64 == 0 && context.is_group_execution_cancelled() ) break; + + // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 + if( comp(*(k), *(k - 1)) ) { + context.cancel_group_execution(); + break; + } + } + } +}; + +//! Body class used to sort elements in a range that is smaller than the grainsize. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +struct quick_sort_body { + void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const { + std::sort(range.begin, range.begin + range.size, range.comp); + } +}; + +//! Method to perform parallel_for based quick sort. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + parallel_for(quick_sort_range<RandomAccessIterator,Compare>(begin, end - begin, comp), + quick_sort_body<RandomAccessIterator,Compare>(), + auto_partitioner()); +} + +//! Wrapper method to initiate the sort by calling parallel_for. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + task_group_context my_context(PARALLEL_SORT); + constexpr int serial_cutoff = 9; + + __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" ); + RandomAccessIterator k = begin; + for( ; k != begin + serial_cutoff; ++k ) { + if( comp(*(k + 1), *k) ) { + do_parallel_quick_sort(begin, end, comp); + } + } + + // Check is input range already sorted + parallel_for(blocked_range<RandomAccessIterator>(k + 1, end), + quick_sort_pretest_body<RandomAccessIterator, Compare>(comp, my_context), + auto_partitioner(), + my_context); + + if( my_context.is_group_execution_cancelled() ) + do_parallel_quick_sort(begin, end, comp); +} + +/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort + Requirements on the iterator type \c It and its value type \c T for \c parallel_sort: + + - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given + iterators \c a and \c b are pointing to. \c It should be a random access iterator. + + - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y; +**/ + +/** \name parallel_sort + See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/ +//@{ + +//! Sorts the data in [begin,end) using the given comparator +/** The compare function object is used for all comparisons between elements during sorting. + The compare object must define a bool operator() function. + @ingroup algorithms **/ +template<typename RandomAccessIterator, typename Compare> +void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + constexpr int min_parallel_size = 500; + if( end > begin ) { + if( end - begin < min_parallel_size ) { + std::sort(begin, end, comp); + } else { + parallel_quick_sort(begin, end, comp); + } + } +} + +//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator> +/** @ingroup algorithms **/ +template<typename RandomAccessIterator> +void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { + parallel_sort(begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>()); +} + +//! Sorts the data in rng using the given comparator +/** @ingroup algorithms **/ +template<typename Range, typename Compare> +void parallel_sort( Range& rng, const Compare& comp ) { + parallel_sort(std::begin(rng), std::end(rng), comp); +} + +//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator> +/** @ingroup algorithms **/ +template<typename Range> +void parallel_sort( Range& rng ) { + parallel_sort(std::begin(rng), std::end(rng)); +} +//@} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::parallel_sort; +} // namespace v1 +} // namespace tbb + +#endif /*__TBB_parallel_sort_H*/ diff --git a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h index 37ac0a09d9..bd1dc377d0 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h +++ b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h @@ -1,688 +1,688 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_partitioner_H -#define __TBB_partitioner_H - -#ifndef __TBB_INITIAL_CHUNKS -// initial task divisions per thread -#define __TBB_INITIAL_CHUNKS 2 -#endif -#ifndef __TBB_RANGE_POOL_CAPACITY -// maximum number of elements in range pool -#define __TBB_RANGE_POOL_CAPACITY 8 -#endif -#ifndef __TBB_INIT_DEPTH -// initial value for depth of range pool -#define __TBB_INIT_DEPTH 5 -#endif -#ifndef __TBB_DEMAND_DEPTH_ADD -// when imbalance is found range splits this value times more -#define __TBB_DEMAND_DEPTH_ADD 1 -#endif - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_aligned_space.h" -#include "detail/_utils.h" -#include "detail/_template_helpers.h" -#include "detail/_range_common.h" -#include "detail/_task.h" -#include "detail/_small_object_pool.h" - -#include "cache_aligned_allocator.h" -#include "task_group.h" // task_group_context -#include "task_arena.h" - -#include <algorithm> -#include <atomic> -#include <type_traits> - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - // Workaround for overzealous compiler warnings - #pragma warning (push) - #pragma warning (disable: 4244) -#endif - -namespace tbb { -namespace detail { - -namespace d1 { -class auto_partitioner; -class simple_partitioner; -class static_partitioner; -class affinity_partitioner; -class affinity_partition_type; -class affinity_partitioner_base; - -inline std::size_t get_initial_auto_partitioner_divisor() { - const std::size_t factor = 4; - return factor * max_concurrency(); -} - -//! Defines entry point for affinity partitioner into oneTBB run-time library. -class affinity_partitioner_base: no_copy { - friend class affinity_partitioner; - friend class affinity_partition_type; - //! Array that remembers affinities of tree positions to affinity_id. - /** NULL if my_size==0. */ - slot_id* my_array; - //! Number of elements in my_array. - std::size_t my_size; - //! Zeros the fields. - affinity_partitioner_base() : my_array(nullptr), my_size(0) {} - //! Deallocates my_array. - ~affinity_partitioner_base() { resize(0); } - //! Resize my_array. - /** Retains values if resulting size is the same. */ - void resize(unsigned factor) { - // Check factor to avoid asking for number of workers while there might be no arena. - unsigned max_threads_in_arena = max_concurrency(); - std::size_t new_size = factor ? factor * max_threads_in_arena : 0; - if (new_size != my_size) { - if (my_array) { - r1::cache_aligned_deallocate(my_array); - // Following two assignments must be done here for sake of exception safety. - my_array = nullptr; - my_size = 0; - } - if (new_size) { - my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id))); - std::fill_n(my_array, new_size, no_slot); - my_size = new_size; - } - } - } -}; - -template<typename Range, typename Body, typename Partitioner> struct start_for; -template<typename Range, typename Body, typename Partitioner> struct start_scan; -template<typename Range, typename Body, typename Partitioner> struct start_reduce; -template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce; - -struct node { - node* my_parent{}; - std::atomic<int> m_ref_count{}; - - node() = default; - node(node* parent, int ref_count) : - my_parent{parent}, m_ref_count{ref_count} { - __TBB_ASSERT(ref_count > 0, "The ref count must be positive"); - } -}; - -struct wait_node : node { - wait_node() : node{ nullptr, 1 } {} - wait_context m_wait{1}; -}; - -//! Join task node that contains shared flag for stealing feedback -struct tree_node : public node { - small_object_allocator m_allocator; - std::atomic<bool> m_child_stolen{false}; - - tree_node(node* parent, int ref_count, small_object_allocator& alloc) - : node{parent, ref_count} - , m_allocator{alloc} {} - - void join(task_group_context*) {/*dummy, required only for reduction algorithms*/}; - - template <typename Task> - static void mark_task_stolen(Task &t) { - std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen; -#if TBB_USE_PROFILING_TOOLS - // Threading tools respect lock prefix but report false-positive data-race via plain store - flag.exchange(true); -#else - flag.store(true, std::memory_order_relaxed); -#endif // TBB_USE_PROFILING_TOOLS - } - template <typename Task> - static bool is_peer_stolen(Task &t) { - return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed); - } -}; - -// Context used to check cancellation state during reduction join process -template<typename TreeNodeType> -void fold_tree(node* n, const execution_data& ed) { - for (;;) { - __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); - call_itt_task_notify(releasing, n); - if (--n->m_ref_count > 0) { - return; - } - node* parent = n->my_parent; - if (!parent) { - break; - }; - - call_itt_task_notify(acquired, n); - TreeNodeType* self = static_cast<TreeNodeType*>(n); - self->join(ed.context); - self->m_allocator.delete_object(self, ed); - n = parent; - } - // Finish parallel for execution when the root (last node) is reached - static_cast<wait_node*>(n)->m_wait.release(); -} - -//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows -//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented -//! by a number that cannot fit into machine word. -typedef unsigned char depth_t; - -//! Range pool stores ranges of type T in a circular buffer with MaxCapacity -template <typename T, depth_t MaxCapacity> -class range_vector { - depth_t my_head; - depth_t my_tail; - depth_t my_size; - depth_t my_depth[MaxCapacity]; // relative depths of stored ranges - tbb::detail::aligned_space<T, MaxCapacity> my_pool; - -public: - //! initialize via first range in pool - range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { - my_depth[0] = 0; - new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move? - } - ~range_vector() { - while( !empty() ) pop_back(); - } - bool empty() const { return my_size == 0; } - depth_t size() const { return my_size; } - //! Populates range pool via ranges up to max depth or while divisible - //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces - void split_to_fill(depth_t max_depth) { - while( my_size < MaxCapacity && is_divisible(max_depth) ) { - depth_t prev = my_head; - my_head = (my_head + 1) % MaxCapacity; - new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? - my_pool.begin()[prev].~T(); // instead of assignment - new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split - my_depth[my_head] = ++my_depth[prev]; - my_size++; - } - } - void pop_back() { - __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size"); - my_pool.begin()[my_head].~T(); - my_size--; - my_head = (my_head + MaxCapacity - 1) % MaxCapacity; - } - void pop_front() { - __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size"); - my_pool.begin()[my_tail].~T(); - my_size--; - my_tail = (my_tail + 1) % MaxCapacity; - } - T& back() { - __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); - return my_pool.begin()[my_head]; - } - T& front() { - __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); - return my_pool.begin()[my_tail]; - } - //! similarly to front(), returns depth of the first range in the pool - depth_t front_depth() { - __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size"); - return my_depth[my_tail]; - } - depth_t back_depth() { - __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size"); - return my_depth[my_head]; - } - bool is_divisible(depth_t max_depth) { - return back_depth() < max_depth && back().is_divisible(); - } -}; - -//! Provides default methods for partition objects and common algorithm blocks. -template <typename Partition> -struct partition_type_base { - typedef detail::split split_type; - // decision makers - void note_affinity( slot_id ) {} - template <typename Task> - bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() - template <typename Range> split_type get_split() { return split(); } - Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper - - template<typename StartType, typename Range> - void work_balance(StartType &start, Range &range, const execution_data&) { - start.run_body( range ); // simple partitioner goes always here - } - - template<typename StartType, typename Range> - void execute(StartType &start, Range &range, execution_data& ed) { - // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): - // [If this task is stolen, adjust depth and divisions if necessary, set flag]. - // If range is divisible { - // Spread the work while [initial divisions left]; - // Create trap task [if necessary]; - // } - // If not divisible or [max depth is reached], execute, else do the range pool part - if ( range.is_divisible() ) { - if ( self().is_divisible() ) { - do { // split until is divisible - typename Partition::split_type split_obj = self().template get_split<Range>(); - start.offer_work( split_obj, ed ); - } while ( range.is_divisible() && self().is_divisible() ); - } - } - self().work_balance(start, range, ed); - } -}; - -//! Provides default splitting strategy for partition objects. -template <typename Partition> -struct adaptive_mode : partition_type_base<Partition> { - typedef Partition my_partition; - std::size_t my_divisor; - // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. - // A task which has only one index must produce the right split without reserved index in order to avoid - // it to be overwritten in note_affinity() of the created (right) task. - // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) - static const unsigned factor = 1; - adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} - adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} - /*! Override do_split methods in order to specify splitting strategy */ - std::size_t do_split(adaptive_mode &src, split) { - return src.my_divisor /= 2u; - } -}; - -//! Helper type for checking availability of proportional_split constructor -template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>; - -//! A helper class to create a proportional_split object for a given type of Range. -/** If the Range has proportional_split constructor, - then created object splits a provided value in an implemenation-defined proportion; - otherwise it represents equal-size split. */ -// TODO: check if this helper can be a nested class of proportional_mode. -template <typename Range, typename = void> -struct proportion_helper { - static proportional_split get_split(std::size_t) { return proportional_split(1,1); } -}; - -template <typename Range> -struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> { - static proportional_split get_split(std::size_t n) { - std::size_t right = n / 2; - std::size_t left = n - right; - return proportional_split(left, right); - } -}; - -//! Provides proportional splitting strategy for partition objects -template <typename Partition> -struct proportional_mode : adaptive_mode<Partition> { - typedef Partition my_partition; - using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes - - proportional_mode() : adaptive_mode<Partition>() {} - proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} - proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } - std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { - std::size_t portion = split_obj.right() * my_partition::factor; - portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); - src.my_divisor -= portion; - return portion; - } - bool is_divisible() { // part of old should_execute_range() - return self().my_divisor > my_partition::factor; - } - template <typename Range> - proportional_split get_split() { - // Create a proportion for the number of threads expected to handle "this" subrange - return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor ); - } -}; - -static std::size_t get_initial_partition_head() { - int current_index = tbb::this_task_arena::current_thread_index(); - if (current_index == tbb::task_arena::not_initialized) - current_index = 0; - return size_t(current_index); -} - -//! Provides default linear indexing of partitioner's sequence -template <typename Partition> -struct linear_affinity_mode : proportional_mode<Partition> { - std::size_t my_head; - std::size_t my_max_affinity; - using proportional_mode<Partition>::self; - linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()), - my_max_affinity(self().my_divisor) {} - linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split()) - , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} - linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj) - , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} - void spawn_task(task& t, task_group_context& ctx) { - if (self().my_divisor) { - spawn(t, ctx, slot_id(my_head)); - } else { - spawn(t, ctx); - } - } -}; - -static bool is_stolen_task(const execution_data& ed) { - return execution_slot(ed) != original_slot(ed); -} - -/*! Determine work-balance phase implementing splitting & stealing actions */ -template<class Mode> -struct dynamic_grainsize_mode : Mode { - using Mode::self; - enum { - begin = 0, - run, - pass - } my_delay; - depth_t my_max_depth; - static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; - dynamic_grainsize_mode(): Mode() - , my_delay(begin) - , my_max_depth(__TBB_INIT_DEPTH) {} - dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) - : Mode(p, split()) - , my_delay(pass) - , my_max_depth(p.my_max_depth) {} - dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) - : Mode(p, split_obj) - , my_delay(begin) - , my_max_depth(p.my_max_depth) {} - template <typename Task> - bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range() - if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree - self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? - if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task -#if __TBB_USE_OPTIONAL_RTTI - // RTTI is available, check whether the cast is valid - // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0); - // correctness of the cast relies on avoiding the root task for which: - // - initial value of my_divisor != 0 (protected by separate assertion) - // - is_stolen_task() always returns false for the root task. -#endif - tree_node::mark_task_stolen(t); - if( !my_max_depth ) my_max_depth++; - my_max_depth += __TBB_DEMAND_DEPTH_ADD; - return true; - } - } - return false; - } - depth_t max_depth() { return my_max_depth; } - void align_depth(depth_t base) { - __TBB_ASSERT(base <= my_max_depth, 0); - my_max_depth -= base; - } - template<typename StartType, typename Range> - void work_balance(StartType &start, Range &range, execution_data& ed) { - if( !range.is_divisible() || !self().max_depth() ) { - start.run_body( range ); // simple partitioner goes always here - } - else { // do range pool - range_vector<Range, range_pool_size> range_pool(range); - do { - range_pool.split_to_fill(self().max_depth()); // fill range pool - if( self().check_for_demand( start ) ) { - if( range_pool.size() > 1 ) { - start.offer_work( range_pool.front(), range_pool.front_depth(), ed ); - range_pool.pop_front(); - continue; - } - if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task - continue; // note: next split_to_fill() should split range at least once - } - start.run_body( range_pool.back() ); - range_pool.pop_back(); - } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() ); - } - } - template <typename Task> - bool check_for_demand(Task& t) { - if ( pass == my_delay ) { - if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array - return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more - else if ( self().my_divisor && my_max_depth ) { // make balancing task - self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() - return true; - } - else if ( tree_node::is_peer_stolen(t) ) { - my_max_depth += __TBB_DEMAND_DEPTH_ADD; - return true; - } - } else if( begin == my_delay ) { - my_delay = pass; - } - return false; - } -}; - -class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { -public: - auto_partition_type( const auto_partitioner& ) - : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { - my_divisor *= __TBB_INITIAL_CHUNKS; - } - auto_partition_type( auto_partition_type& src, split) - : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {} - bool is_divisible() { // part of old should_execute_range() - if( my_divisor > 1 ) return true; - if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead - // keep same fragmentation while splitting for the local task pool - my_max_depth--; - my_divisor = 0; // decrease max_depth once per task - return true; - } else return false; - } - template <typename Task> - bool check_for_demand(Task& t) { - if (tree_node::is_peer_stolen(t)) { - my_max_depth += __TBB_DEMAND_DEPTH_ADD; - return true; - } else return false; - } - void spawn_task(task& t, task_group_context& ctx) { - spawn(t, ctx); - } -}; - -class simple_partition_type: public partition_type_base<simple_partition_type> { -public: - simple_partition_type( const simple_partitioner& ) {} - simple_partition_type( const simple_partition_type&, split ) {} - //! simplified algorithm - template<typename StartType, typename Range> - void execute(StartType &start, Range &range, execution_data& ed) { - split_type split_obj = split(); // start.offer_work accepts split_type as reference - while( range.is_divisible() ) - start.offer_work( split_obj, ed ); - start.run_body( range ); - } - void spawn_task(task& t, task_group_context& ctx) { - spawn(t, ctx); - } -}; - -class static_partition_type : public linear_affinity_mode<static_partition_type> { -public: - typedef detail::proportional_split split_type; - static_partition_type( const static_partitioner& ) - : linear_affinity_mode<static_partition_type>() {} - static_partition_type( static_partition_type& p, const proportional_split& split_obj ) - : linear_affinity_mode<static_partition_type>(p, split_obj) {} -}; - -class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > { - static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units - slot_id* my_array; -public: - static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task - typedef detail::proportional_split split_type; - affinity_partition_type( affinity_partitioner_base& ap ) - : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { - __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); - ap.resize(factor); - my_array = ap.my_array; - my_max_depth = factor_power + 1; - __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); - } - affinity_partition_type(affinity_partition_type& p, split) - : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) - , my_array(p.my_array) {} - affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) - : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj) - , my_array(p.my_array) {} - void note_affinity(slot_id id) { - if( my_divisor ) - my_array[my_head] = id; - } - void spawn_task(task& t, task_group_context& ctx) { - if (my_divisor) { - if (!my_array[my_head]) { - // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse - spawn(t, ctx, slot_id(my_head / factor)); - } else { - spawn(t, ctx, my_array[my_head]); - } - } else { - spawn(t, ctx); - } - } -}; - -//! A simple partitioner -/** Divides the range until the range is not divisible. - @ingroup algorithms */ -class simple_partitioner { -public: - simple_partitioner() {} -private: - template<typename Range, typename Body, typename Partitioner> friend struct start_for; - template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_scan; - // new implementation just extends existing interface - typedef simple_partition_type task_partition_type; - // TODO: consider to make split_type public - typedef simple_partition_type::split_type split_type; - - // for parallel_scan only - class partition_type { - public: - bool should_execute_range(const execution_data& ) {return false;} - partition_type( const simple_partitioner& ) {} - partition_type( const partition_type&, split ) {} - }; -}; - -//! An auto partitioner -/** The range is initial divided into several large chunks. - Chunks are further subdivided into smaller pieces if demand detected and they are divisible. - @ingroup algorithms */ -class auto_partitioner { -public: - auto_partitioner() {} - -private: - template<typename Range, typename Body, typename Partitioner> friend struct start_for; - template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_scan; - // new implementation just extends existing interface - typedef auto_partition_type task_partition_type; - // TODO: consider to make split_type public - typedef auto_partition_type::split_type split_type; - - //! Backward-compatible partition for auto and affinity partition objects. - class partition_type { - size_t num_chunks; - static const size_t VICTIM_CHUNKS = 4; - public: - bool should_execute_range(const execution_data& ed) { - if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) ) - num_chunks = VICTIM_CHUNKS; - return num_chunks==1; - } - partition_type( const auto_partitioner& ) - : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} - partition_type( partition_type& pt, split ) { - num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; - } - }; -}; - -//! A static partitioner -class static_partitioner { -public: - static_partitioner() {} -private: - template<typename Range, typename Body, typename Partitioner> friend struct start_for; - template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_scan; - // new implementation just extends existing interface - typedef static_partition_type task_partition_type; - // TODO: consider to make split_type public - typedef static_partition_type::split_type split_type; -}; - -//! An affinity partitioner -class affinity_partitioner : affinity_partitioner_base { -public: - affinity_partitioner() {} - -private: - template<typename Range, typename Body, typename Partitioner> friend struct start_for; - template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; - template<typename Range, typename Body, typename Partitioner> friend struct start_scan; - // new implementation just extends existing interface - typedef affinity_partition_type task_partition_type; - // TODO: consider to make split_type public - typedef affinity_partition_type::split_type split_type; -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -// Partitioners -using detail::d1::auto_partitioner; -using detail::d1::simple_partitioner; -using detail::d1::static_partitioner; -using detail::d1::affinity_partitioner; -// Split types -using detail::split; -using detail::proportional_split; -} // namespace v1 - -} // namespace tbb - -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) - #pragma warning (pop) -#endif // warning 4244 is back - -#undef __TBB_INITIAL_CHUNKS -#undef __TBB_RANGE_POOL_CAPACITY -#undef __TBB_INIT_DEPTH - -#endif /* __TBB_partitioner_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_partitioner_H +#define __TBB_partitioner_H + +#ifndef __TBB_INITIAL_CHUNKS +// initial task divisions per thread +#define __TBB_INITIAL_CHUNKS 2 +#endif +#ifndef __TBB_RANGE_POOL_CAPACITY +// maximum number of elements in range pool +#define __TBB_RANGE_POOL_CAPACITY 8 +#endif +#ifndef __TBB_INIT_DEPTH +// initial value for depth of range pool +#define __TBB_INIT_DEPTH 5 +#endif +#ifndef __TBB_DEMAND_DEPTH_ADD +// when imbalance is found range splits this value times more +#define __TBB_DEMAND_DEPTH_ADD 1 +#endif + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_aligned_space.h" +#include "detail/_utils.h" +#include "detail/_template_helpers.h" +#include "detail/_range_common.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" + +#include "cache_aligned_allocator.h" +#include "task_group.h" // task_group_context +#include "task_arena.h" + +#include <algorithm> +#include <atomic> +#include <type_traits> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Workaround for overzealous compiler warnings + #pragma warning (push) + #pragma warning (disable: 4244) +#endif + +namespace tbb { +namespace detail { + +namespace d1 { +class auto_partitioner; +class simple_partitioner; +class static_partitioner; +class affinity_partitioner; +class affinity_partition_type; +class affinity_partitioner_base; + +inline std::size_t get_initial_auto_partitioner_divisor() { + const std::size_t factor = 4; + return factor * max_concurrency(); +} + +//! Defines entry point for affinity partitioner into oneTBB run-time library. +class affinity_partitioner_base: no_copy { + friend class affinity_partitioner; + friend class affinity_partition_type; + //! Array that remembers affinities of tree positions to affinity_id. + /** NULL if my_size==0. */ + slot_id* my_array; + //! Number of elements in my_array. + std::size_t my_size; + //! Zeros the fields. + affinity_partitioner_base() : my_array(nullptr), my_size(0) {} + //! Deallocates my_array. + ~affinity_partitioner_base() { resize(0); } + //! Resize my_array. + /** Retains values if resulting size is the same. */ + void resize(unsigned factor) { + // Check factor to avoid asking for number of workers while there might be no arena. + unsigned max_threads_in_arena = max_concurrency(); + std::size_t new_size = factor ? factor * max_threads_in_arena : 0; + if (new_size != my_size) { + if (my_array) { + r1::cache_aligned_deallocate(my_array); + // Following two assignments must be done here for sake of exception safety. + my_array = nullptr; + my_size = 0; + } + if (new_size) { + my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id))); + std::fill_n(my_array, new_size, no_slot); + my_size = new_size; + } + } + } +}; + +template<typename Range, typename Body, typename Partitioner> struct start_for; +template<typename Range, typename Body, typename Partitioner> struct start_scan; +template<typename Range, typename Body, typename Partitioner> struct start_reduce; +template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce; + +struct node { + node* my_parent{}; + std::atomic<int> m_ref_count{}; + + node() = default; + node(node* parent, int ref_count) : + my_parent{parent}, m_ref_count{ref_count} { + __TBB_ASSERT(ref_count > 0, "The ref count must be positive"); + } +}; + +struct wait_node : node { + wait_node() : node{ nullptr, 1 } {} + wait_context m_wait{1}; +}; + +//! Join task node that contains shared flag for stealing feedback +struct tree_node : public node { + small_object_allocator m_allocator; + std::atomic<bool> m_child_stolen{false}; + + tree_node(node* parent, int ref_count, small_object_allocator& alloc) + : node{parent, ref_count} + , m_allocator{alloc} {} + + void join(task_group_context*) {/*dummy, required only for reduction algorithms*/}; + + template <typename Task> + static void mark_task_stolen(Task &t) { + std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen; +#if TBB_USE_PROFILING_TOOLS + // Threading tools respect lock prefix but report false-positive data-race via plain store + flag.exchange(true); +#else + flag.store(true, std::memory_order_relaxed); +#endif // TBB_USE_PROFILING_TOOLS + } + template <typename Task> + static bool is_peer_stolen(Task &t) { + return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed); + } +}; + +// Context used to check cancellation state during reduction join process +template<typename TreeNodeType> +void fold_tree(node* n, const execution_data& ed) { + for (;;) { + __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); + call_itt_task_notify(releasing, n); + if (--n->m_ref_count > 0) { + return; + } + node* parent = n->my_parent; + if (!parent) { + break; + }; + + call_itt_task_notify(acquired, n); + TreeNodeType* self = static_cast<TreeNodeType*>(n); + self->join(ed.context); + self->m_allocator.delete_object(self, ed); + n = parent; + } + // Finish parallel for execution when the root (last node) is reached + static_cast<wait_node*>(n)->m_wait.release(); +} + +//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows +//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented +//! by a number that cannot fit into machine word. +typedef unsigned char depth_t; + +//! Range pool stores ranges of type T in a circular buffer with MaxCapacity +template <typename T, depth_t MaxCapacity> +class range_vector { + depth_t my_head; + depth_t my_tail; + depth_t my_size; + depth_t my_depth[MaxCapacity]; // relative depths of stored ranges + tbb::detail::aligned_space<T, MaxCapacity> my_pool; + +public: + //! initialize via first range in pool + range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { + my_depth[0] = 0; + new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move? + } + ~range_vector() { + while( !empty() ) pop_back(); + } + bool empty() const { return my_size == 0; } + depth_t size() const { return my_size; } + //! Populates range pool via ranges up to max depth or while divisible + //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces + void split_to_fill(depth_t max_depth) { + while( my_size < MaxCapacity && is_divisible(max_depth) ) { + depth_t prev = my_head; + my_head = (my_head + 1) % MaxCapacity; + new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? + my_pool.begin()[prev].~T(); // instead of assignment + new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split + my_depth[my_head] = ++my_depth[prev]; + my_size++; + } + } + void pop_back() { + __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size"); + my_pool.begin()[my_head].~T(); + my_size--; + my_head = (my_head + MaxCapacity - 1) % MaxCapacity; + } + void pop_front() { + __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size"); + my_pool.begin()[my_tail].~T(); + my_size--; + my_tail = (my_tail + 1) % MaxCapacity; + } + T& back() { + __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); + return my_pool.begin()[my_head]; + } + T& front() { + __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); + return my_pool.begin()[my_tail]; + } + //! similarly to front(), returns depth of the first range in the pool + depth_t front_depth() { + __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size"); + return my_depth[my_tail]; + } + depth_t back_depth() { + __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size"); + return my_depth[my_head]; + } + bool is_divisible(depth_t max_depth) { + return back_depth() < max_depth && back().is_divisible(); + } +}; + +//! Provides default methods for partition objects and common algorithm blocks. +template <typename Partition> +struct partition_type_base { + typedef detail::split split_type; + // decision makers + void note_affinity( slot_id ) {} + template <typename Task> + bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() + template <typename Range> split_type get_split() { return split(); } + Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper + + template<typename StartType, typename Range> + void work_balance(StartType &start, Range &range, const execution_data&) { + start.run_body( range ); // simple partitioner goes always here + } + + template<typename StartType, typename Range> + void execute(StartType &start, Range &range, execution_data& ed) { + // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): + // [If this task is stolen, adjust depth and divisions if necessary, set flag]. + // If range is divisible { + // Spread the work while [initial divisions left]; + // Create trap task [if necessary]; + // } + // If not divisible or [max depth is reached], execute, else do the range pool part + if ( range.is_divisible() ) { + if ( self().is_divisible() ) { + do { // split until is divisible + typename Partition::split_type split_obj = self().template get_split<Range>(); + start.offer_work( split_obj, ed ); + } while ( range.is_divisible() && self().is_divisible() ); + } + } + self().work_balance(start, range, ed); + } +}; + +//! Provides default splitting strategy for partition objects. +template <typename Partition> +struct adaptive_mode : partition_type_base<Partition> { + typedef Partition my_partition; + std::size_t my_divisor; + // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. + // A task which has only one index must produce the right split without reserved index in order to avoid + // it to be overwritten in note_affinity() of the created (right) task. + // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) + static const unsigned factor = 1; + adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} + adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} + /*! Override do_split methods in order to specify splitting strategy */ + std::size_t do_split(adaptive_mode &src, split) { + return src.my_divisor /= 2u; + } +}; + +//! Helper type for checking availability of proportional_split constructor +template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>; + +//! A helper class to create a proportional_split object for a given type of Range. +/** If the Range has proportional_split constructor, + then created object splits a provided value in an implemenation-defined proportion; + otherwise it represents equal-size split. */ +// TODO: check if this helper can be a nested class of proportional_mode. +template <typename Range, typename = void> +struct proportion_helper { + static proportional_split get_split(std::size_t) { return proportional_split(1,1); } +}; + +template <typename Range> +struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> { + static proportional_split get_split(std::size_t n) { + std::size_t right = n / 2; + std::size_t left = n - right; + return proportional_split(left, right); + } +}; + +//! Provides proportional splitting strategy for partition objects +template <typename Partition> +struct proportional_mode : adaptive_mode<Partition> { + typedef Partition my_partition; + using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes + + proportional_mode() : adaptive_mode<Partition>() {} + proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} + proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } + std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { + std::size_t portion = split_obj.right() * my_partition::factor; + portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); + src.my_divisor -= portion; + return portion; + } + bool is_divisible() { // part of old should_execute_range() + return self().my_divisor > my_partition::factor; + } + template <typename Range> + proportional_split get_split() { + // Create a proportion for the number of threads expected to handle "this" subrange + return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor ); + } +}; + +static std::size_t get_initial_partition_head() { + int current_index = tbb::this_task_arena::current_thread_index(); + if (current_index == tbb::task_arena::not_initialized) + current_index = 0; + return size_t(current_index); +} + +//! Provides default linear indexing of partitioner's sequence +template <typename Partition> +struct linear_affinity_mode : proportional_mode<Partition> { + std::size_t my_head; + std::size_t my_max_affinity; + using proportional_mode<Partition>::self; + linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()), + my_max_affinity(self().my_divisor) {} + linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split()) + , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} + linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj) + , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} + void spawn_task(task& t, task_group_context& ctx) { + if (self().my_divisor) { + spawn(t, ctx, slot_id(my_head)); + } else { + spawn(t, ctx); + } + } +}; + +static bool is_stolen_task(const execution_data& ed) { + return execution_slot(ed) != original_slot(ed); +} + +/*! Determine work-balance phase implementing splitting & stealing actions */ +template<class Mode> +struct dynamic_grainsize_mode : Mode { + using Mode::self; + enum { + begin = 0, + run, + pass + } my_delay; + depth_t my_max_depth; + static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; + dynamic_grainsize_mode(): Mode() + , my_delay(begin) + , my_max_depth(__TBB_INIT_DEPTH) {} + dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) + : Mode(p, split()) + , my_delay(pass) + , my_max_depth(p.my_max_depth) {} + dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) + : Mode(p, split_obj) + , my_delay(begin) + , my_max_depth(p.my_max_depth) {} + template <typename Task> + bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range() + if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree + self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? + if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task +#if __TBB_USE_OPTIONAL_RTTI + // RTTI is available, check whether the cast is valid + // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0); + // correctness of the cast relies on avoiding the root task for which: + // - initial value of my_divisor != 0 (protected by separate assertion) + // - is_stolen_task() always returns false for the root task. +#endif + tree_node::mark_task_stolen(t); + if( !my_max_depth ) my_max_depth++; + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } + } + return false; + } + depth_t max_depth() { return my_max_depth; } + void align_depth(depth_t base) { + __TBB_ASSERT(base <= my_max_depth, 0); + my_max_depth -= base; + } + template<typename StartType, typename Range> + void work_balance(StartType &start, Range &range, execution_data& ed) { + if( !range.is_divisible() || !self().max_depth() ) { + start.run_body( range ); // simple partitioner goes always here + } + else { // do range pool + range_vector<Range, range_pool_size> range_pool(range); + do { + range_pool.split_to_fill(self().max_depth()); // fill range pool + if( self().check_for_demand( start ) ) { + if( range_pool.size() > 1 ) { + start.offer_work( range_pool.front(), range_pool.front_depth(), ed ); + range_pool.pop_front(); + continue; + } + if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task + continue; // note: next split_to_fill() should split range at least once + } + start.run_body( range_pool.back() ); + range_pool.pop_back(); + } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() ); + } + } + template <typename Task> + bool check_for_demand(Task& t) { + if ( pass == my_delay ) { + if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array + return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more + else if ( self().my_divisor && my_max_depth ) { // make balancing task + self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() + return true; + } + else if ( tree_node::is_peer_stolen(t) ) { + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } + } else if( begin == my_delay ) { + my_delay = pass; + } + return false; + } +}; + +class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { +public: + auto_partition_type( const auto_partitioner& ) + : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { + my_divisor *= __TBB_INITIAL_CHUNKS; + } + auto_partition_type( auto_partition_type& src, split) + : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {} + bool is_divisible() { // part of old should_execute_range() + if( my_divisor > 1 ) return true; + if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead + // keep same fragmentation while splitting for the local task pool + my_max_depth--; + my_divisor = 0; // decrease max_depth once per task + return true; + } else return false; + } + template <typename Task> + bool check_for_demand(Task& t) { + if (tree_node::is_peer_stolen(t)) { + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } else return false; + } + void spawn_task(task& t, task_group_context& ctx) { + spawn(t, ctx); + } +}; + +class simple_partition_type: public partition_type_base<simple_partition_type> { +public: + simple_partition_type( const simple_partitioner& ) {} + simple_partition_type( const simple_partition_type&, split ) {} + //! simplified algorithm + template<typename StartType, typename Range> + void execute(StartType &start, Range &range, execution_data& ed) { + split_type split_obj = split(); // start.offer_work accepts split_type as reference + while( range.is_divisible() ) + start.offer_work( split_obj, ed ); + start.run_body( range ); + } + void spawn_task(task& t, task_group_context& ctx) { + spawn(t, ctx); + } +}; + +class static_partition_type : public linear_affinity_mode<static_partition_type> { +public: + typedef detail::proportional_split split_type; + static_partition_type( const static_partitioner& ) + : linear_affinity_mode<static_partition_type>() {} + static_partition_type( static_partition_type& p, const proportional_split& split_obj ) + : linear_affinity_mode<static_partition_type>(p, split_obj) {} +}; + +class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > { + static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units + slot_id* my_array; +public: + static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task + typedef detail::proportional_split split_type; + affinity_partition_type( affinity_partitioner_base& ap ) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { + __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); + ap.resize(factor); + my_array = ap.my_array; + my_max_depth = factor_power + 1; + __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); + } + affinity_partition_type(affinity_partition_type& p, split) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) + , my_array(p.my_array) {} + affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj) + , my_array(p.my_array) {} + void note_affinity(slot_id id) { + if( my_divisor ) + my_array[my_head] = id; + } + void spawn_task(task& t, task_group_context& ctx) { + if (my_divisor) { + if (!my_array[my_head]) { + // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse + spawn(t, ctx, slot_id(my_head / factor)); + } else { + spawn(t, ctx, my_array[my_head]); + } + } else { + spawn(t, ctx); + } + } +}; + +//! A simple partitioner +/** Divides the range until the range is not divisible. + @ingroup algorithms */ +class simple_partitioner { +public: + simple_partitioner() {} +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef simple_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef simple_partition_type::split_type split_type; + + // for parallel_scan only + class partition_type { + public: + bool should_execute_range(const execution_data& ) {return false;} + partition_type( const simple_partitioner& ) {} + partition_type( const partition_type&, split ) {} + }; +}; + +//! An auto partitioner +/** The range is initial divided into several large chunks. + Chunks are further subdivided into smaller pieces if demand detected and they are divisible. + @ingroup algorithms */ +class auto_partitioner { +public: + auto_partitioner() {} + +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef auto_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef auto_partition_type::split_type split_type; + + //! Backward-compatible partition for auto and affinity partition objects. + class partition_type { + size_t num_chunks; + static const size_t VICTIM_CHUNKS = 4; + public: + bool should_execute_range(const execution_data& ed) { + if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) ) + num_chunks = VICTIM_CHUNKS; + return num_chunks==1; + } + partition_type( const auto_partitioner& ) + : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} + partition_type( partition_type& pt, split ) { + num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; + } + }; +}; + +//! A static partitioner +class static_partitioner { +public: + static_partitioner() {} +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef static_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef static_partition_type::split_type split_type; +}; + +//! An affinity partitioner +class affinity_partitioner : affinity_partitioner_base { +public: + affinity_partitioner() {} + +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef affinity_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef affinity_partition_type::split_type split_type; +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +// Partitioners +using detail::d1::auto_partitioner; +using detail::d1::simple_partitioner; +using detail::d1::static_partitioner; +using detail::d1::affinity_partitioner; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (pop) +#endif // warning 4244 is back + +#undef __TBB_INITIAL_CHUNKS +#undef __TBB_RANGE_POOL_CAPACITY +#undef __TBB_INIT_DEPTH + +#endif /* __TBB_partitioner_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/profiling.h b/contrib/libs/tbb/include/oneapi/tbb/profiling.h index 4b62da2060..7a90d71f60 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/profiling.h +++ b/contrib/libs/tbb/include/oneapi/tbb/profiling.h @@ -1,243 +1,243 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_profiling_H -#define __TBB_profiling_H - -#include "detail/_config.h" -#include <cstdint> - -#include <string> - -namespace tbb { -namespace detail { -inline namespace d0 { - // include list of index names - #define TBB_STRING_RESOURCE(index_name,str) index_name, - enum string_resource_index : std::uintptr_t { - #include "detail/_string_resource.h" - NUM_STRINGS - }; - #undef TBB_STRING_RESOURCE - - enum itt_relation - { - __itt_relation_is_unknown = 0, - __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ - __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ - __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ - __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ - __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ - __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ - __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ - }; - -//! Unicode support -#if (_WIN32||_WIN64) && !__MINGW32__ - //! Unicode character type. Always wchar_t on Windows. - using tchar = wchar_t; -#else /* !WIN */ - using tchar = char; -#endif /* !WIN */ - -} // namespace d0 -} // namespace detail -} // namespace tbb - -#include <atomic> -#if _WIN32||_WIN64 -#include <stdlib.h> /* mbstowcs_s */ -#endif -// Need these to work regardless of tools support -namespace tbb { -namespace detail { -namespace d1 { - enum notify_type {prepare=0, cancel, acquired, releasing, destroy}; - enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS }; -} // namespace d1 - -namespace r1 { - void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); - void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); - void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, - void* parent, unsigned long long parent_extra, string_resource_index name_index); - void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, - void* parent, unsigned long long parent_extra, string_resource_index name_index); - void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); - void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); - void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, - string_resource_index key, const char* value); - void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, - string_resource_index key, void* value); - void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, - itt_relation relation, void* addr1, unsigned long long addr1_extra); - void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, - void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); - void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); -} // namespace r1 - -namespace d1 { -#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__ - inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { - std::size_t len; - mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); - return len; // mbstowcs_s counts null terminator - } -#endif - -#if TBB_USE_PROFILING_TOOLS - inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { -#if (_WIN32||_WIN64) && !__MINGW32__ - std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); - wchar_t *type = new wchar_t[len_type]; - multibyte_to_widechar(type, objtype, len_type); - std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0); - wchar_t *name = new wchar_t[len_name]; - multibyte_to_widechar(name, objname, len_name); -#else // WIN - const char *type = objtype; - const char *name = objname; -#endif - r1::create_itt_sync(ptr, type, name); - -#if (_WIN32||_WIN64) && !__MINGW32__ - delete[] type; - delete[] name; -#endif // WIN - } - -// Distinguish notifications on task for reducing overheads -#if TBB_USE_PROFILING_TOOLS == 2 - inline void call_itt_task_notify(d1::notify_type t, void *ptr) { - r1::call_itt_notify((int)t, ptr); - } -#else - inline void call_itt_task_notify(d1::notify_type, void *) {} -#endif // TBB_USE_PROFILING_TOOLS - - inline void call_itt_notify(d1::notify_type t, void *ptr) { - r1::call_itt_notify((int)t, ptr); - } - -#if (_WIN32||_WIN64) && !__MINGW32__ - inline void itt_set_sync_name(void* obj, const wchar_t* name) { - r1::itt_set_sync_name(obj, name); - } - inline void itt_set_sync_name(void* obj, const char* name) { - std::size_t len_name = multibyte_to_widechar(nullptr, name, 0); - wchar_t *obj_name = new wchar_t[len_name]; - multibyte_to_widechar(obj_name, name, len_name); - r1::itt_set_sync_name(obj, obj_name); - delete[] obj_name; - } -#else - inline void itt_set_sync_name( void* obj, const char* name) { - r1::itt_set_sync_name(obj, name); - } -#endif //WIN - - inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra, - void* parent, unsigned long long parent_extra, string_resource_index name_index) { - r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index); - } - - inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra, - string_resource_index key, const char *value ) { - r1::itt_metadata_str_add( domain, addr, addr_extra, key, value ); - } - - inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra, - string_resource_index key, void *value) { - r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value); - } - - inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, - itt_relation relation, void *addr1, unsigned long long addr1_extra ) { - r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra ); - } - - inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra, - void *parent, unsigned long long parent_extra, string_resource_index name_index ) { - r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index ); - } - - inline void itt_task_end( itt_domain_enum domain ) { - r1::itt_task_end( domain ); - } - - inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra, - void *parent, unsigned long long parent_extra, string_resource_index name_index ) { - r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index ); - } - - inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra ) { - r1::itt_region_end( domain, region, region_extra ); - } -#else - inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {} - - inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} - - inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} -#endif // TBB_USE_PROFILING_TOOLS - -#if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) -class event { -/** This class supports user event traces through itt. - Common use-case is tagging data flow graph tasks (data-id) - and visualization by Intel Advisor Flow Graph Analyzer (FGA) **/ -// TODO: Replace implementation by itt user event api. - - const std::string my_name; - - static void emit_trace(const std::string &input) { - itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); - } - -public: - event(const std::string &input) - : my_name( input ) - { } - - void emit() { - emit_trace(my_name); - } - - static void emit(const std::string &description) { - emit_trace(description); - } - -}; -#else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) -// Using empty struct if user event tracing is disabled: -struct event { - event(const std::string &) { } - - void emit() { } - - static void emit(const std::string &) { } -}; -#endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) -} // namespace d1 -} // namespace detail - -namespace profiling { - using detail::d1::event; -} -} // namespace tbb - - -#endif /* __TBB_profiling_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_profiling_H +#define __TBB_profiling_H + +#include "detail/_config.h" +#include <cstdint> + +#include <string> + +namespace tbb { +namespace detail { +inline namespace d0 { + // include list of index names + #define TBB_STRING_RESOURCE(index_name,str) index_name, + enum string_resource_index : std::uintptr_t { + #include "detail/_string_resource.h" + NUM_STRINGS + }; + #undef TBB_STRING_RESOURCE + + enum itt_relation + { + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ + }; + +//! Unicode support +#if (_WIN32||_WIN64) && !__MINGW32__ + //! Unicode character type. Always wchar_t on Windows. + using tchar = wchar_t; +#else /* !WIN */ + using tchar = char; +#endif /* !WIN */ + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#include <atomic> +#if _WIN32||_WIN64 +#include <stdlib.h> /* mbstowcs_s */ +#endif +// Need these to work regardless of tools support +namespace tbb { +namespace detail { +namespace d1 { + enum notify_type {prepare=0, cancel, acquired, releasing, destroy}; + enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS }; +} // namespace d1 + +namespace r1 { + void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); + void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); + void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index); + void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index); + void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); + void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); + void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + string_resource_index key, const char* value); + void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + string_resource_index key, void* value); + void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, + itt_relation relation, void* addr1, unsigned long long addr1_extra); + void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, + void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); + void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); +} // namespace r1 + +namespace d1 { +#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__ + inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { + std::size_t len; + mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); + return len; // mbstowcs_s counts null terminator + } +#endif + +#if TBB_USE_PROFILING_TOOLS + inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { +#if (_WIN32||_WIN64) && !__MINGW32__ + std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); + wchar_t *type = new wchar_t[len_type]; + multibyte_to_widechar(type, objtype, len_type); + std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0); + wchar_t *name = new wchar_t[len_name]; + multibyte_to_widechar(name, objname, len_name); +#else // WIN + const char *type = objtype; + const char *name = objname; +#endif + r1::create_itt_sync(ptr, type, name); + +#if (_WIN32||_WIN64) && !__MINGW32__ + delete[] type; + delete[] name; +#endif // WIN + } + +// Distinguish notifications on task for reducing overheads +#if TBB_USE_PROFILING_TOOLS == 2 + inline void call_itt_task_notify(d1::notify_type t, void *ptr) { + r1::call_itt_notify((int)t, ptr); + } +#else + inline void call_itt_task_notify(d1::notify_type, void *) {} +#endif // TBB_USE_PROFILING_TOOLS + + inline void call_itt_notify(d1::notify_type t, void *ptr) { + r1::call_itt_notify((int)t, ptr); + } + +#if (_WIN32||_WIN64) && !__MINGW32__ + inline void itt_set_sync_name(void* obj, const wchar_t* name) { + r1::itt_set_sync_name(obj, name); + } + inline void itt_set_sync_name(void* obj, const char* name) { + std::size_t len_name = multibyte_to_widechar(nullptr, name, 0); + wchar_t *obj_name = new wchar_t[len_name]; + multibyte_to_widechar(obj_name, name, len_name); + r1::itt_set_sync_name(obj, obj_name); + delete[] obj_name; + } +#else + inline void itt_set_sync_name( void* obj, const char* name) { + r1::itt_set_sync_name(obj, name); + } +#endif //WIN + + inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index); + } + + inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, const char *value ) { + r1::itt_metadata_str_add( domain, addr, addr_extra, key, value ); + } + + inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, void *value) { + r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value); + } + + inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, + itt_relation relation, void *addr1, unsigned long long addr1_extra ) { + r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra ); + } + + inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra, + void *parent, unsigned long long parent_extra, string_resource_index name_index ) { + r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index ); + } + + inline void itt_task_end( itt_domain_enum domain ) { + r1::itt_task_end( domain ); + } + + inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra, + void *parent, unsigned long long parent_extra, string_resource_index name_index ) { + r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index ); + } + + inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra ) { + r1::itt_region_end( domain, region, region_extra ); + } +#else + inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {} + + inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} + + inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} +#endif // TBB_USE_PROFILING_TOOLS + +#if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +class event { +/** This class supports user event traces through itt. + Common use-case is tagging data flow graph tasks (data-id) + and visualization by Intel Advisor Flow Graph Analyzer (FGA) **/ +// TODO: Replace implementation by itt user event api. + + const std::string my_name; + + static void emit_trace(const std::string &input) { + itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); + } + +public: + event(const std::string &input) + : my_name( input ) + { } + + void emit() { + emit_trace(my_name); + } + + static void emit(const std::string &description) { + emit_trace(description); + } + +}; +#else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +// Using empty struct if user event tracing is disabled: +struct event { + event(const std::string &) { } + + void emit() { } + + static void emit(const std::string &) { } +}; +#endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +} // namespace d1 +} // namespace detail + +namespace profiling { + using detail::d1::event; +} +} // namespace tbb + + +#endif /* __TBB_profiling_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h index 6c3f1fe1e9..00c7443f9a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h @@ -1,197 +1,197 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_queuing_mutex_H -#define __TBB_queuing_mutex_H - -#include "detail/_namespace_injection.h" -#include "detail/_assert.h" -#include "detail/_utils.h" - -#include "profiling.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace d1 { - -//! Queuing mutex with local-only spinning. -/** @ingroup synchronization */ -class queuing_mutex { -public: - //! Construct unacquired mutex. - queuing_mutex() noexcept { - create_itt_sync(this, "tbb::queuing_mutex", ""); - }; - - queuing_mutex(const queuing_mutex&) = delete; - queuing_mutex& operator=(const queuing_mutex&) = delete; - - //! The scoped locking pattern - /** It helps to avoid the common problem of forgetting to release lock. - It also nicely provides the "node" for queuing locks. */ - class scoped_lock { - //! Reset fields to mean "no lock held". - void reset() { - m_mutex = nullptr; - } - - public: - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - scoped_lock() = default; - - //! Acquire lock on given mutex. - scoped_lock(queuing_mutex& m) { - acquire(m); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if (m_mutex) release(); - } - - //! No Copy - scoped_lock( const scoped_lock& ) = delete; - scoped_lock& operator=( const scoped_lock& ) = delete; - - //! Acquire lock on given mutex. - void acquire( queuing_mutex& m ) { - __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); - - // Must set all fields before the exchange, because once the - // exchange executes, *this becomes accessible to other threads. - m_mutex = &m; - m_next.store(nullptr, std::memory_order_relaxed); - m_going.store(0U, std::memory_order_relaxed); - - // x86 compare exchange operation always has a strong fence - // "sending" the fields initialized above to other processors. - scoped_lock* pred = m.q_tail.exchange(this); - if (pred) { - call_itt_notify(prepare, &m); - __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!"); - - pred->m_next.store(this, std::memory_order_relaxed); - spin_wait_while_eq(m_going, 0U); - } - call_itt_notify(acquired, &m); - - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); - } - - //! Acquire lock on given mutex if free (i.e. non-blocking) - bool try_acquire( queuing_mutex& m ) { - __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); - - // Must set all fields before the compare_exchange_strong, because once the - // compare_exchange_strong executes, *this becomes accessible to other threads. - m_next.store(nullptr, std::memory_order_relaxed); - m_going.store(0U, std::memory_order_relaxed); - - scoped_lock* expected = nullptr; - // The compare_exchange_strong must have release semantics, because we are - // "sending" the fields initialized above to other processors. - // x86 compare exchange operation always has a strong fence - if (!m.q_tail.compare_exchange_strong(expected, this)) - return false; - - m_mutex = &m; - - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); - call_itt_notify(acquired, &m); - return true; - } - - //! Release lock. - void release() - { - __TBB_ASSERT(this->m_mutex, "no lock acquired"); - - call_itt_notify(releasing, this->m_mutex); - - if (m_next.load(std::memory_order_relaxed) == nullptr) { - scoped_lock* expected = this; - if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) { - // this was the only item in the queue, and the queue is now empty. - reset(); - return; - } - // Someone in the queue - spin_wait_while_eq(m_next, nullptr); - } - m_next.load(std::memory_order_relaxed)->m_going.store(1U, std::memory_order_release); - - reset(); - } - - private: - //! The pointer to the mutex owned, or NULL if not holding a mutex. - queuing_mutex* m_mutex{nullptr}; - - //! The pointer to the next competitor for a mutex - std::atomic<scoped_lock*> m_next{nullptr}; - - //! The local spin-wait variable - /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of - zero-initialization. Defining it as an entire word instead of - a byte seems to help performance slightly. */ - std::atomic<uintptr_t> m_going{0U}; - }; - - // Mutex traits - static constexpr bool is_rw_mutex = false; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = true; - -private: - //! The last competitor requesting the lock - std::atomic<scoped_lock*> q_tail{nullptr}; - -}; - -#if TBB_USE_PROFILING_TOOLS -inline void set_name(queuing_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(queuing_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif //WIN -#else -inline void set_name(queuing_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(queuing_mutex&, const wchar_t*) {} -#endif //WIN -#endif -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::queuing_mutex; -} // namespace v1 -namespace profiling { - using detail::d1::set_name; -} -} // namespace tbb - -#endif /* __TBB_queuing_mutex_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_queuing_mutex_H +#define __TBB_queuing_mutex_H + +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include "profiling.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Queuing mutex with local-only spinning. +/** @ingroup synchronization */ +class queuing_mutex { +public: + //! Construct unacquired mutex. + queuing_mutex() noexcept { + create_itt_sync(this, "tbb::queuing_mutex", ""); + }; + + queuing_mutex(const queuing_mutex&) = delete; + queuing_mutex& operator=(const queuing_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + //! Reset fields to mean "no lock held". + void reset() { + m_mutex = nullptr; + } + + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + scoped_lock() = default; + + //! Acquire lock on given mutex. + scoped_lock(queuing_mutex& m) { + acquire(m); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if (m_mutex) release(); + } + + //! No Copy + scoped_lock( const scoped_lock& ) = delete; + scoped_lock& operator=( const scoped_lock& ) = delete; + + //! Acquire lock on given mutex. + void acquire( queuing_mutex& m ) { + __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + m_mutex = &m; + m_next.store(nullptr, std::memory_order_relaxed); + m_going.store(0U, std::memory_order_relaxed); + + // x86 compare exchange operation always has a strong fence + // "sending" the fields initialized above to other processors. + scoped_lock* pred = m.q_tail.exchange(this); + if (pred) { + call_itt_notify(prepare, &m); + __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!"); + + pred->m_next.store(this, std::memory_order_relaxed); + spin_wait_while_eq(m_going, 0U); + } + call_itt_notify(acquired, &m); + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + } + + //! Acquire lock on given mutex if free (i.e. non-blocking) + bool try_acquire( queuing_mutex& m ) { + __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the compare_exchange_strong, because once the + // compare_exchange_strong executes, *this becomes accessible to other threads. + m_next.store(nullptr, std::memory_order_relaxed); + m_going.store(0U, std::memory_order_relaxed); + + scoped_lock* expected = nullptr; + // The compare_exchange_strong must have release semantics, because we are + // "sending" the fields initialized above to other processors. + // x86 compare exchange operation always has a strong fence + if (!m.q_tail.compare_exchange_strong(expected, this)) + return false; + + m_mutex = &m; + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + call_itt_notify(acquired, &m); + return true; + } + + //! Release lock. + void release() + { + __TBB_ASSERT(this->m_mutex, "no lock acquired"); + + call_itt_notify(releasing, this->m_mutex); + + if (m_next.load(std::memory_order_relaxed) == nullptr) { + scoped_lock* expected = this; + if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) { + // this was the only item in the queue, and the queue is now empty. + reset(); + return; + } + // Someone in the queue + spin_wait_while_eq(m_next, nullptr); + } + m_next.load(std::memory_order_relaxed)->m_going.store(1U, std::memory_order_release); + + reset(); + } + + private: + //! The pointer to the mutex owned, or NULL if not holding a mutex. + queuing_mutex* m_mutex{nullptr}; + + //! The pointer to the next competitor for a mutex + std::atomic<scoped_lock*> m_next{nullptr}; + + //! The local spin-wait variable + /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of + zero-initialization. Defining it as an entire word instead of + a byte seems to help performance slightly. */ + std::atomic<uintptr_t> m_going{0U}; + }; + + // Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = true; + +private: + //! The last competitor requesting the lock + std::atomic<scoped_lock*> q_tail{nullptr}; + +}; + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(queuing_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(queuing_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_mutex&, const wchar_t*) {} +#endif //WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::queuing_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#endif /* __TBB_queuing_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h index 6bb748f8a3..502e7997df 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h @@ -1,199 +1,199 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_queuing_rw_mutex_H -#define __TBB_queuing_rw_mutex_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_assert.h" - -#include "profiling.h" - -#include <cstring> -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { -struct queuing_rw_mutex_impl; -} -namespace d1 { - -//! Queuing reader-writer mutex with local-only spinning. -/** Adapted from Krieger, Stumm, et al. pseudocode at - https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock - @ingroup synchronization */ -class queuing_rw_mutex { - friend r1::queuing_rw_mutex_impl; -public: - //! Construct unacquired mutex. - queuing_rw_mutex() noexcept { - create_itt_sync(this, "tbb::queuing_rw_mutex", ""); - } - - //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL - ~queuing_rw_mutex() { - __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); - } - - //! No Copy - queuing_rw_mutex(const queuing_rw_mutex&) = delete; - queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete; - - //! The scoped locking pattern - /** It helps to avoid the common problem of forgetting to release lock. - It also nicely provides the "node" for queuing locks. */ - class scoped_lock { - friend r1::queuing_rw_mutex_impl; - //! Initialize fields to mean "no lock held". - void initialize() { - my_mutex = nullptr; - my_internal_lock.store(0, std::memory_order_relaxed); - my_going.store(0, std::memory_order_relaxed); -#if TBB_USE_ASSERT - my_state = 0xFF; // Set to invalid state - my_next.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); - my_prev.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); -#endif /* TBB_USE_ASSERT */ - } - - public: - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - scoped_lock() {initialize();} - - //! Acquire lock on given mutex. - scoped_lock( queuing_rw_mutex& m, bool write=true ) { - initialize(); - acquire(m,write); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if( my_mutex ) release(); - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock on given mutex. - void acquire( queuing_rw_mutex& m, bool write=true ); - - //! Acquire lock on given mutex if free (i.e. non-blocking) - bool try_acquire( queuing_rw_mutex& m, bool write=true ); - - //! Release lock. - void release(); - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - bool upgrade_to_writer(); - - //! Downgrade writer to become a reader. - bool downgrade_to_reader(); - - private: - //! The pointer to the mutex owned, or NULL if not holding a mutex. - queuing_rw_mutex* my_mutex; - - //! The 'pointer' to the previous and next competitors for a mutex - std::atomic<uintptr_t> my_prev; - std::atomic<uintptr_t> my_next; - - using state_t = unsigned char ; - - //! State of the request: reader, writer, active reader, other service states - std::atomic<state_t> my_state; - - //! The local spin-wait variable - /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */ - std::atomic<unsigned char> my_going; - - //! A tiny internal lock - std::atomic<unsigned char> my_internal_lock; - }; - - // Mutex traits - static constexpr bool is_rw_mutex = true; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = true; - -private: - //! The last competitor requesting the lock - std::atomic<scoped_lock*> q_tail{nullptr}; -}; -#if TBB_USE_PROFILING_TOOLS -inline void set_name(queuing_rw_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif //WIN -#else -inline void set_name(queuing_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(queuing_rw_mutex&, const wchar_t*) {} -#endif //WIN -#endif -} // namespace d1 - -namespace r1 { -void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); -bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); -void release(d1::queuing_rw_mutex::scoped_lock&); -bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); -bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); -} // namespace r1 - -namespace d1 { - - -inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) { - r1::acquire(m, *this, write); -} - -inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) { - return r1::try_acquire(m, *this, write); -} - -inline void queuing_rw_mutex::scoped_lock::release() { - r1::release(*this); -} - -inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { - return r1::upgrade_to_writer(*this); -} - -inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { - return r1::downgrade_to_reader(*this); -} -} // namespace d1 - -} // namespace detail - -inline namespace v1 { -using detail::d1::queuing_rw_mutex; -} // namespace v1 -namespace profiling { - using detail::d1::set_name; -} -} // namespace tbb - -#endif /* __TBB_queuing_rw_mutex_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_queuing_rw_mutex_H +#define __TBB_queuing_rw_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" + +#include "profiling.h" + +#include <cstring> +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { +struct queuing_rw_mutex_impl; +} +namespace d1 { + +//! Queuing reader-writer mutex with local-only spinning. +/** Adapted from Krieger, Stumm, et al. pseudocode at + https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock + @ingroup synchronization */ +class queuing_rw_mutex { + friend r1::queuing_rw_mutex_impl; +public: + //! Construct unacquired mutex. + queuing_rw_mutex() noexcept { + create_itt_sync(this, "tbb::queuing_rw_mutex", ""); + } + + //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL + ~queuing_rw_mutex() { + __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); + } + + //! No Copy + queuing_rw_mutex(const queuing_rw_mutex&) = delete; + queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + friend r1::queuing_rw_mutex_impl; + //! Initialize fields to mean "no lock held". + void initialize() { + my_mutex = nullptr; + my_internal_lock.store(0, std::memory_order_relaxed); + my_going.store(0, std::memory_order_relaxed); +#if TBB_USE_ASSERT + my_state = 0xFF; // Set to invalid state + my_next.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); + my_prev.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); +#endif /* TBB_USE_ASSERT */ + } + + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + scoped_lock() {initialize();} + + //! Acquire lock on given mutex. + scoped_lock( queuing_rw_mutex& m, bool write=true ) { + initialize(); + acquire(m,write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if( my_mutex ) release(); + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire( queuing_rw_mutex& m, bool write=true ); + + //! Acquire lock on given mutex if free (i.e. non-blocking) + bool try_acquire( queuing_rw_mutex& m, bool write=true ); + + //! Release lock. + void release(); + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade_to_writer(); + + //! Downgrade writer to become a reader. + bool downgrade_to_reader(); + + private: + //! The pointer to the mutex owned, or NULL if not holding a mutex. + queuing_rw_mutex* my_mutex; + + //! The 'pointer' to the previous and next competitors for a mutex + std::atomic<uintptr_t> my_prev; + std::atomic<uintptr_t> my_next; + + using state_t = unsigned char ; + + //! State of the request: reader, writer, active reader, other service states + std::atomic<state_t> my_state; + + //! The local spin-wait variable + /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */ + std::atomic<unsigned char> my_going; + + //! A tiny internal lock + std::atomic<unsigned char> my_internal_lock; + }; + + // Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = true; + +private: + //! The last competitor requesting the lock + std::atomic<scoped_lock*> q_tail{nullptr}; +}; +#if TBB_USE_PROFILING_TOOLS +inline void set_name(queuing_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(queuing_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_rw_mutex&, const wchar_t*) {} +#endif //WIN +#endif +} // namespace d1 + +namespace r1 { +void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +void release(d1::queuing_rw_mutex::scoped_lock&); +bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); +bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); +} // namespace r1 + +namespace d1 { + + +inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) { + r1::acquire(m, *this, write); +} + +inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) { + return r1::try_acquire(m, *this, write); +} + +inline void queuing_rw_mutex::scoped_lock::release() { + r1::release(*this); +} + +inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { + return r1::upgrade_to_writer(*this); +} + +inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { + return r1::downgrade_to_reader(*this); +} +} // namespace d1 + +} // namespace detail + +inline namespace v1 { +using detail::d1::queuing_rw_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#endif /* __TBB_queuing_rw_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h index daab02f324..20addb3453 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h @@ -1,332 +1,332 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_scalable_allocator_H -#define __TBB_scalable_allocator_H - -#ifdef __cplusplus -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_utils.h" -#include <cstdlib> -#include <utility> -#else -#include <stddef.h> /* Need ptrdiff_t and size_t from here. */ -#if !_MSC_VER -#include <stdint.h> /* Need intptr_t from here. */ -#endif -#endif - -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT -#error #include <memory_resource> -#endif - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#if _MSC_VER - #define __TBB_EXPORTED_FUNC __cdecl -#else - #define __TBB_EXPORTED_FUNC -#endif - -/** The "malloc" analogue to allocate block of memory of size bytes. - * @ingroup memory_allocation */ -void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size); - -/** The "free" analogue to discard a previously allocated piece of memory. - @ingroup memory_allocation */ -void __TBB_EXPORTED_FUNC scalable_free(void* ptr); - -/** The "realloc" analogue complementing scalable_malloc. - @ingroup memory_allocation */ -void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size); - -/** The "calloc" analogue complementing scalable_malloc. - @ingroup memory_allocation */ -void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size); - -/** The "posix_memalign" analogue. - @ingroup memory_allocation */ -int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size); - -/** The "_aligned_malloc" analogue. - @ingroup memory_allocation */ -void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment); - -/** The "_aligned_realloc" analogue. - @ingroup memory_allocation */ -void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment); - -/** The "_aligned_free" analogue. - @ingroup memory_allocation */ -void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr); - -/** The analogue of _msize/malloc_size/malloc_usable_size. - Returns the usable size of a memory block previously allocated by scalable_*, - or 0 (zero) if ptr does not point to such a block. - @ingroup memory_allocation */ -size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr); - -/* Results for scalable_allocation_* functions */ -typedef enum { - TBBMALLOC_OK, - TBBMALLOC_INVALID_PARAM, - TBBMALLOC_UNSUPPORTED, - TBBMALLOC_NO_MEMORY, - TBBMALLOC_NO_EFFECT -} ScalableAllocationResult; - -/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages. - scalable_allocation_mode call has priority over environment variable. */ -typedef enum { - TBBMALLOC_USE_HUGE_PAGES, /* value turns using huge pages on and off */ - /* deprecated, kept for backward compatibility only */ - USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES, - /* try to limit memory consumption value (Bytes), clean internal buffers - if limit is exceeded, but not prevents from requesting memory from OS */ - TBBMALLOC_SET_SOFT_HEAP_LIMIT, - /* Lower bound for the size (Bytes), that is interpreted as huge - * and not released during regular cleanup operations. */ - TBBMALLOC_SET_HUGE_SIZE_THRESHOLD -} AllocationModeParam; - -/** Set TBB allocator-specific allocation modes. - @ingroup memory_allocation */ -int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value); - -typedef enum { - /* Clean internal allocator buffers for all threads. - Returns TBBMALLOC_NO_EFFECT if no buffers cleaned, - TBBMALLOC_OK if some memory released from buffers. */ - TBBMALLOC_CLEAN_ALL_BUFFERS, - /* Clean internal allocator buffer for current thread only. - Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */ - TBBMALLOC_CLEAN_THREAD_BUFFERS -} ScalableAllocationCmd; - -/** Call TBB allocator-specific commands. - @ingroup memory_allocation */ -int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ - -#ifdef __cplusplus - -//! The namespace rml contains components of low-level memory pool interface. -namespace rml { -class MemoryPool; - -typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes); -// returns non-zero in case of error -typedef int (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes); - -struct MemPoolPolicy { - enum { - TBBMALLOC_POOL_VERSION = 1 - }; - - rawAllocType pAlloc; - rawFreeType pFree; - // granularity of pAlloc allocations. 0 means default used. - std::size_t granularity; - int version; - // all memory consumed at 1st pAlloc call and never returned, - // no more pAlloc calls after 1st - unsigned fixedPool : 1, - // memory consumed but returned only at pool termination - keepAllMemory : 1, - reserved : 30; - - MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_, - std::size_t granularity_ = 0, bool fixedPool_ = false, - bool keepAllMemory_ = false) : - pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION), - fixedPool(fixedPool_), keepAllMemory(keepAllMemory_), - reserved(0) {} -}; - -// enums have same values as appropriate enums from ScalableAllocationResult -// TODO: use ScalableAllocationResult in pool_create directly -enum MemPoolError { - // pool created successfully - POOL_OK = TBBMALLOC_OK, - // invalid policy parameters found - INVALID_POLICY = TBBMALLOC_INVALID_PARAM, - // requested pool policy is not supported by allocator library - UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED, - // lack of memory during pool creation - NO_MEMORY = TBBMALLOC_NO_MEMORY, - // action takes no effect - NO_EFFECT = TBBMALLOC_NO_EFFECT -}; - -MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy, - rml::MemoryPool **pool); - -bool pool_destroy(MemoryPool* memPool); -void *pool_malloc(MemoryPool* memPool, std::size_t size); -void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size); -void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment); -void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment); -bool pool_reset(MemoryPool* memPool); -bool pool_free(MemoryPool *memPool, void *object); -MemoryPool *pool_identify(void *object); -std::size_t pool_msize(MemoryPool *memPool, void *object); - -} // namespace rml - -namespace tbb { -namespace detail { -namespace d1 { - -// keep throw in a separate function to prevent code bloat -template<typename E> -void throw_exception(const E &e) { -#if TBB_USE_EXCEPTIONS - throw e; -#else - suppress_unused_warning(e); -#endif -} - -template<typename T> -class scalable_allocator { -public: - using value_type = T; - using propagate_on_container_move_assignment = std::true_type; - - //! Always defined for TBB containers - using is_always_equal = std::true_type; - - scalable_allocator() = default; - template<typename U> scalable_allocator(const scalable_allocator<U>&) noexcept {} - - //! Allocate space for n objects. - __TBB_nodiscard T* allocate(std::size_t n) { - T* p = static_cast<T*>(scalable_malloc(n * sizeof(value_type))); - if (!p) { - throw_exception(std::bad_alloc()); - } - return p; - } - - //! Free previously allocated block of memory - void deallocate(T* p, std::size_t) { - scalable_free(p); - } - -#if TBB_ALLOCATOR_TRAITS_BROKEN - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using difference_type = std::ptrdiff_t; - using size_type = std::size_t; - template<typename U> struct rebind { - using other = scalable_allocator<U>; - }; - //! Largest value for which method allocate might succeed. - size_type max_size() const noexcept { - size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type); - return (absolutemax > 0 ? absolutemax : 1); - } - template<typename U, typename... Args> - void construct(U *p, Args&&... args) - { ::new((void *)p) U(std::forward<Args>(args)...); } - void destroy(pointer p) { p->~value_type(); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } -#endif // TBB_ALLOCATOR_TRAITS_BROKEN - -}; - -#if TBB_ALLOCATOR_TRAITS_BROKEN - template<> - class scalable_allocator<void> { - public: - using pointer = void*; - using const_pointer = const void*; - using value_type = void; - template<typename U> struct rebind { - using other = scalable_allocator<U>; - }; - }; -#endif - -template<typename T, typename U> -inline bool operator==(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return true; } - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template<typename T, typename U> -inline bool operator!=(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return false; } -#endif - -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT - -//! C++17 memory resource implementation for scalable allocator -//! ISO C++ Section 23.12.2 -class scalable_resource_impl : public std::pmr::memory_resource { -private: - void* do_allocate(std::size_t bytes, std::size_t alignment) override { - void* p = scalable_aligned_malloc(bytes, alignment); - if (!p) { - throw_exception(std::bad_alloc()); - } - return p; - } - - void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override { - scalable_free(ptr); - } - - //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any - //! other instance of this class - bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { - return this == &other || -#if __TBB_USE_OPTIONAL_RTTI - dynamic_cast<const scalable_resource_impl*>(&other) != nullptr; -#else - false; -#endif - } -}; - -//! Global scalable allocator memory resource provider -inline std::pmr::memory_resource* scalable_memory_resource() noexcept { - static tbb::detail::d1::scalable_resource_impl scalable_res; - return &scalable_res; -} - -#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::scalable_allocator; -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT -using detail::d1::scalable_memory_resource; -#endif -} // namespace v1 - -} // namespace tbb - -#endif /* __cplusplus */ - -#endif /* __TBB_scalable_allocator_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_scalable_allocator_H +#define __TBB_scalable_allocator_H + +#ifdef __cplusplus +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" +#include <cstdlib> +#include <utility> +#else +#include <stddef.h> /* Need ptrdiff_t and size_t from here. */ +#if !_MSC_VER +#include <stdint.h> /* Need intptr_t from here. */ +#endif +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#if _MSC_VER + #define __TBB_EXPORTED_FUNC __cdecl +#else + #define __TBB_EXPORTED_FUNC +#endif + +/** The "malloc" analogue to allocate block of memory of size bytes. + * @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size); + +/** The "free" analogue to discard a previously allocated piece of memory. + @ingroup memory_allocation */ +void __TBB_EXPORTED_FUNC scalable_free(void* ptr); + +/** The "realloc" analogue complementing scalable_malloc. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size); + +/** The "calloc" analogue complementing scalable_malloc. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size); + +/** The "posix_memalign" analogue. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size); + +/** The "_aligned_malloc" analogue. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment); + +/** The "_aligned_realloc" analogue. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment); + +/** The "_aligned_free" analogue. + @ingroup memory_allocation */ +void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr); + +/** The analogue of _msize/malloc_size/malloc_usable_size. + Returns the usable size of a memory block previously allocated by scalable_*, + or 0 (zero) if ptr does not point to such a block. + @ingroup memory_allocation */ +size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr); + +/* Results for scalable_allocation_* functions */ +typedef enum { + TBBMALLOC_OK, + TBBMALLOC_INVALID_PARAM, + TBBMALLOC_UNSUPPORTED, + TBBMALLOC_NO_MEMORY, + TBBMALLOC_NO_EFFECT +} ScalableAllocationResult; + +/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages. + scalable_allocation_mode call has priority over environment variable. */ +typedef enum { + TBBMALLOC_USE_HUGE_PAGES, /* value turns using huge pages on and off */ + /* deprecated, kept for backward compatibility only */ + USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES, + /* try to limit memory consumption value (Bytes), clean internal buffers + if limit is exceeded, but not prevents from requesting memory from OS */ + TBBMALLOC_SET_SOFT_HEAP_LIMIT, + /* Lower bound for the size (Bytes), that is interpreted as huge + * and not released during regular cleanup operations. */ + TBBMALLOC_SET_HUGE_SIZE_THRESHOLD +} AllocationModeParam; + +/** Set TBB allocator-specific allocation modes. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value); + +typedef enum { + /* Clean internal allocator buffers for all threads. + Returns TBBMALLOC_NO_EFFECT if no buffers cleaned, + TBBMALLOC_OK if some memory released from buffers. */ + TBBMALLOC_CLEAN_ALL_BUFFERS, + /* Clean internal allocator buffer for current thread only. + Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */ + TBBMALLOC_CLEAN_THREAD_BUFFERS +} ScalableAllocationCmd; + +/** Call TBB allocator-specific commands. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#ifdef __cplusplus + +//! The namespace rml contains components of low-level memory pool interface. +namespace rml { +class MemoryPool; + +typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes); +// returns non-zero in case of error +typedef int (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes); + +struct MemPoolPolicy { + enum { + TBBMALLOC_POOL_VERSION = 1 + }; + + rawAllocType pAlloc; + rawFreeType pFree; + // granularity of pAlloc allocations. 0 means default used. + std::size_t granularity; + int version; + // all memory consumed at 1st pAlloc call and never returned, + // no more pAlloc calls after 1st + unsigned fixedPool : 1, + // memory consumed but returned only at pool termination + keepAllMemory : 1, + reserved : 30; + + MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_, + std::size_t granularity_ = 0, bool fixedPool_ = false, + bool keepAllMemory_ = false) : + pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION), + fixedPool(fixedPool_), keepAllMemory(keepAllMemory_), + reserved(0) {} +}; + +// enums have same values as appropriate enums from ScalableAllocationResult +// TODO: use ScalableAllocationResult in pool_create directly +enum MemPoolError { + // pool created successfully + POOL_OK = TBBMALLOC_OK, + // invalid policy parameters found + INVALID_POLICY = TBBMALLOC_INVALID_PARAM, + // requested pool policy is not supported by allocator library + UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED, + // lack of memory during pool creation + NO_MEMORY = TBBMALLOC_NO_MEMORY, + // action takes no effect + NO_EFFECT = TBBMALLOC_NO_EFFECT +}; + +MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy, + rml::MemoryPool **pool); + +bool pool_destroy(MemoryPool* memPool); +void *pool_malloc(MemoryPool* memPool, std::size_t size); +void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size); +void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment); +void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment); +bool pool_reset(MemoryPool* memPool); +bool pool_free(MemoryPool *memPool, void *object); +MemoryPool *pool_identify(void *object); +std::size_t pool_msize(MemoryPool *memPool, void *object); + +} // namespace rml + +namespace tbb { +namespace detail { +namespace d1 { + +// keep throw in a separate function to prevent code bloat +template<typename E> +void throw_exception(const E &e) { +#if TBB_USE_EXCEPTIONS + throw e; +#else + suppress_unused_warning(e); +#endif +} + +template<typename T> +class scalable_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers + using is_always_equal = std::true_type; + + scalable_allocator() = default; + template<typename U> scalable_allocator(const scalable_allocator<U>&) noexcept {} + + //! Allocate space for n objects. + __TBB_nodiscard T* allocate(std::size_t n) { + T* p = static_cast<T*>(scalable_malloc(n * sizeof(value_type))); + if (!p) { + throw_exception(std::bad_alloc()); + } + return p; + } + + //! Free previously allocated block of memory + void deallocate(T* p, std::size_t) { + scalable_free(p); + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = scalable_allocator<U>; + }; + //! Largest value for which method allocate might succeed. + size_type max_size() const noexcept { + size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type); + return (absolutemax > 0 ? absolutemax : 1); + } + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new((void *)p) U(std::forward<Args>(args)...); } + void destroy(pointer p) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN + +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class scalable_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = scalable_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +inline bool operator==(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +inline bool operator!=(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return false; } +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +//! C++17 memory resource implementation for scalable allocator +//! ISO C++ Section 23.12.2 +class scalable_resource_impl : public std::pmr::memory_resource { +private: + void* do_allocate(std::size_t bytes, std::size_t alignment) override { + void* p = scalable_aligned_malloc(bytes, alignment); + if (!p) { + throw_exception(std::bad_alloc()); + } + return p; + } + + void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override { + scalable_free(ptr); + } + + //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any + //! other instance of this class + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + return this == &other || +#if __TBB_USE_OPTIONAL_RTTI + dynamic_cast<const scalable_resource_impl*>(&other) != nullptr; +#else + false; +#endif + } +}; + +//! Global scalable allocator memory resource provider +inline std::pmr::memory_resource* scalable_memory_resource() noexcept { + static tbb::detail::d1::scalable_resource_impl scalable_res; + return &scalable_res; +} + +#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::scalable_allocator; +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +using detail::d1::scalable_memory_resource; +#endif +} // namespace v1 + +} // namespace tbb + +#endif /* __cplusplus */ + +#endif /* __TBB_scalable_allocator_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h index 7fde7e15af..aa9bcb6fd6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h @@ -1,179 +1,179 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_spin_mutex_H -#define __TBB_spin_mutex_H - -#include "detail/_namespace_injection.h" - -#include "profiling.h" - -#include "detail/_assert.h" -#include "detail/_utils.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace d1 { - -#if __TBB_TSX_INTRINSICS_PRESENT -class rtm_mutex; -#endif - -/** A spin_mutex is a low-level synchronization primitive. - While locked, it causes the waiting threads to spin in a loop until the lock is released. - It should be used only for locking short critical sections - (typically less than 20 instructions) when fairness is not an issue. - If zero-initialized, the mutex is considered unheld. - @ingroup synchronization */ -class spin_mutex { -public: - //! Constructors - spin_mutex() noexcept : m_flag(false) { - create_itt_sync(this, "tbb::spin_mutex", ""); - }; - - //! Destructor - ~spin_mutex() = default; - - //! No Copy - spin_mutex(const spin_mutex&) = delete; - spin_mutex& operator=(const spin_mutex&) = delete; - - //! Represents acquisition of a mutex. - class scoped_lock { - //! Points to currently held mutex, or NULL if no lock is held. - spin_mutex* m_mutex; - - public: - //! Construct without acquiring a mutex. - constexpr scoped_lock() noexcept : m_mutex(nullptr) {} - - //! Construct and acquire lock on a mutex. - scoped_lock(spin_mutex& m) { - acquire(m); - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock. - void acquire(spin_mutex& m) { - m_mutex = &m; - m.lock(); - } - - //! Try acquiring lock (non-blocking) - /** Return true if lock acquired; false otherwise. */ - bool try_acquire(spin_mutex& m) { - bool result = m.try_lock(); - if (result) { - m_mutex = &m; - } - return result; - } - - //! Release lock - void release() { - __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock"); - m_mutex->unlock(); - m_mutex = nullptr; - } - - //! Destroy lock. If holding a lock, releases the lock first. - ~scoped_lock() { - if (m_mutex) { - release(); - } - } - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = false; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = false; - - //! Acquire lock - /** Spin if the lock is taken */ - void lock() { - atomic_backoff backoff; - call_itt_notify(prepare, this); - while (m_flag.exchange(true)) backoff.pause(); - call_itt_notify(acquired, this); - } - - //! Try acquiring lock (non-blocking) - /** Return true if lock acquired; false otherwise. */ - bool try_lock() { - bool result = !m_flag.exchange(true); - if (result) { - call_itt_notify(acquired, this); - } - return result; - } - - //! Release lock - void unlock() { - call_itt_notify(releasing, this); - m_flag.store(false, std::memory_order_release); - } - -protected: - std::atomic<bool> m_flag; -}; // class spin_mutex - -#if TBB_USE_PROFILING_TOOLS -inline void set_name(spin_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(spin_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif //WIN -#else -inline void set_name(spin_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(spin_mutex&, const wchar_t*) {} -#endif // WIN -#endif -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::spin_mutex; -} // namespace v1 -namespace profiling { - using detail::d1::set_name; -} -} // namespace tbb - -#include "detail/_rtm_mutex.h" - -namespace tbb { -inline namespace v1 { -#if __TBB_TSX_INTRINSICS_PRESENT - using speculative_spin_mutex = detail::d1::rtm_mutex; -#else - using speculative_spin_mutex = detail::d1::spin_mutex; -#endif -} -} - -#endif /* __TBB_spin_mutex_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_spin_mutex_H +#define __TBB_spin_mutex_H + +#include "detail/_namespace_injection.h" + +#include "profiling.h" + +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +#if __TBB_TSX_INTRINSICS_PRESENT +class rtm_mutex; +#endif + +/** A spin_mutex is a low-level synchronization primitive. + While locked, it causes the waiting threads to spin in a loop until the lock is released. + It should be used only for locking short critical sections + (typically less than 20 instructions) when fairness is not an issue. + If zero-initialized, the mutex is considered unheld. + @ingroup synchronization */ +class spin_mutex { +public: + //! Constructors + spin_mutex() noexcept : m_flag(false) { + create_itt_sync(this, "tbb::spin_mutex", ""); + }; + + //! Destructor + ~spin_mutex() = default; + + //! No Copy + spin_mutex(const spin_mutex&) = delete; + spin_mutex& operator=(const spin_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + //! Points to currently held mutex, or NULL if no lock is held. + spin_mutex* m_mutex; + + public: + //! Construct without acquiring a mutex. + constexpr scoped_lock() noexcept : m_mutex(nullptr) {} + + //! Construct and acquire lock on a mutex. + scoped_lock(spin_mutex& m) { + acquire(m); + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock. + void acquire(spin_mutex& m) { + m_mutex = &m; + m.lock(); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_acquire(spin_mutex& m) { + bool result = m.try_lock(); + if (result) { + m_mutex = &m; + } + return result; + } + + //! Release lock + void release() { + __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock"); + m_mutex->unlock(); + m_mutex = nullptr; + } + + //! Destroy lock. If holding a lock, releases the lock first. + ~scoped_lock() { + if (m_mutex) { + release(); + } + } + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + /** Spin if the lock is taken */ + void lock() { + atomic_backoff backoff; + call_itt_notify(prepare, this); + while (m_flag.exchange(true)) backoff.pause(); + call_itt_notify(acquired, this); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + bool result = !m_flag.exchange(true); + if (result) { + call_itt_notify(acquired, this); + } + return result; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + m_flag.store(false, std::memory_order_release); + } + +protected: + std::atomic<bool> m_flag; +}; // class spin_mutex + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(spin_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(spin_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_mutex&, const wchar_t*) {} +#endif // WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::spin_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#include "detail/_rtm_mutex.h" + +namespace tbb { +inline namespace v1 { +#if __TBB_TSX_INTRINSICS_PRESENT + using speculative_spin_mutex = detail::d1::rtm_mutex; +#else + using speculative_spin_mutex = detail::d1::spin_mutex; +#endif +} +} + +#endif /* __TBB_spin_mutex_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h index baf6b24b56..d3a43be30a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h @@ -1,307 +1,307 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_spin_rw_mutex_H -#define __TBB_spin_rw_mutex_H - -#include "detail/_namespace_injection.h" - -#include "profiling.h" - -#include "detail/_assert.h" -#include "detail/_utils.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace d1 { - -#if __TBB_TSX_INTRINSICS_PRESENT -class rtm_rw_mutex; -#endif - -//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference -/** @ingroup synchronization */ -class spin_rw_mutex { -public: - //! Constructors - spin_rw_mutex() noexcept : m_state(0) { - create_itt_sync(this, "tbb::spin_rw_mutex", ""); - } - - //! Destructor - ~spin_rw_mutex() { - __TBB_ASSERT(!m_state, "destruction of an acquired mutex"); - } - - //! No Copy - spin_rw_mutex(const spin_rw_mutex&) = delete; - spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; - - //! The scoped locking pattern - /** It helps to avoid the common problem of forgetting to release lock. - It also nicely provides the "node" for queuing locks. */ - class scoped_lock { - public: - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {} - - //! Acquire lock on given mutex. - scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) { - acquire(m, write); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if (m_mutex) { - release(); - } - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock on given mutex. - void acquire(spin_rw_mutex& m, bool write = true) { - m_is_writer = write; - m_mutex = &m; - if (write) { - m_mutex->lock(); - } else { - m_mutex->lock_shared(); - } - } - - //! Try acquire lock on given mutex. - bool try_acquire(spin_rw_mutex& m, bool write = true) { - m_is_writer = write; - bool result = write ? m.try_lock() : m.try_lock_shared(); - if (result) { - m_mutex = &m; - } - return result; - } - - //! Release lock. - void release() { - spin_rw_mutex* m = m_mutex; - m_mutex = nullptr; - - if (m_is_writer) { - m->unlock(); - } else { - m->unlock_shared(); - } - } - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - bool upgrade_to_writer() { - if (m_is_writer) return true; // Already a writer - m_is_writer = true; - return m_mutex->upgrade(); - } - - //! Downgrade writer to become a reader. - bool downgrade_to_reader() { - if (!m_is_writer) return true; // Already a reader - m_mutex->downgrade(); - m_is_writer = false; - return true; - } - - protected: - //! The pointer to the current mutex that is held, or nullptr if no mutex is held. - spin_rw_mutex* m_mutex; - - //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. - /** Not defined if not holding a lock. */ - bool m_is_writer; - }; - - //! Mutex traits - static constexpr bool is_rw_mutex = true; - static constexpr bool is_recursive_mutex = false; - static constexpr bool is_fair_mutex = false; - - //! Acquire lock - void lock() { - call_itt_notify(prepare, this); - for (atomic_backoff backoff; ; backoff.pause()) { - state_type s = m_state.load(std::memory_order_relaxed); - if (!(s & BUSY)) { // no readers, no writers - if (m_state.compare_exchange_strong(s, WRITER)) - break; // successfully stored writer flag - backoff.reset(); // we could be very close to complete op. - } else if (!(s & WRITER_PENDING)) { // no pending writers - m_state |= WRITER_PENDING; - } - } - call_itt_notify(acquired, this); - } - - //! Try acquiring lock (non-blocking) - /** Return true if lock acquired; false otherwise. */ - bool try_lock() { - // for a writer: only possible to acquire if no active readers or writers - state_type s = m_state.load(std::memory_order_relaxed); - if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 - if (m_state.compare_exchange_strong(s, WRITER)) { - call_itt_notify(acquired, this); - return true; // successfully stored writer flag - } - } - return false; - } - - //! Release lock - void unlock() { - call_itt_notify(releasing, this); - m_state &= READERS; - } - - //! Lock shared ownership mutex - void lock_shared() { - call_itt_notify(prepare, this); - for (atomic_backoff b; ; b.pause()) { - state_type s = m_state.load(std::memory_order_relaxed); - if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests - state_type prev_state = m_state.fetch_add(ONE_READER); - if (!(prev_state & WRITER)) { - break; // successfully stored increased number of readers - } - // writer got there first, undo the increment - m_state -= ONE_READER; - } - } - call_itt_notify(acquired, this); - __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); - } - - //! Try lock shared ownership mutex - bool try_lock_shared() { - // for a reader: acquire if no active or waiting writers - state_type s = m_state.load(std::memory_order_relaxed); - if (!(s & (WRITER | WRITER_PENDING))) { // no writers - state_type prev_state = m_state.fetch_add(ONE_READER); - if (!(prev_state & WRITER)) { // got the lock - call_itt_notify(acquired, this); - return true; // successfully stored increased number of readers - } - // writer got there first, undo the increment - m_state -= ONE_READER; - } - return false; - } - - //! Unlock shared ownership mutex - void unlock_shared() { - __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); - call_itt_notify(releasing, this); - m_state -= ONE_READER; - } - -protected: - /** Internal non ISO C++ standard API **/ - //! This API is used through the scoped_lock class - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - bool upgrade() { - state_type s = m_state.load(std::memory_order_relaxed); - __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); - // Check and set writer-pending flag. - // Required conditions: either no pending writers, or we are the only reader - // (with multiple readers and pending writer, another upgrade could have been requested) - while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { - if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { - atomic_backoff backoff; - while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause(); - __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); - // Both new readers and writers are blocked at this time - m_state -= (ONE_READER + WRITER_PENDING); - return true; // successfully upgraded - } - } - // Slow reacquire - unlock_shared(); - lock(); - return false; - } - - //! Downgrade writer to a reader - void downgrade() { - call_itt_notify(releasing, this); - m_state += (ONE_READER - WRITER); - __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers"); - } - - using state_type = std::intptr_t; - static constexpr state_type WRITER = 1; - static constexpr state_type WRITER_PENDING = 2; - static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); - static constexpr state_type ONE_READER = 4; - static constexpr state_type BUSY = WRITER | READERS; - //! State of lock - /** Bit 0 = writer is holding lock - Bit 1 = request by a writer to acquire lock (hint to readers to wait) - Bit 2..N = number of readers holding lock */ - std::atomic<state_type> m_state; -}; // class spin_rw_mutex - -#if TBB_USE_PROFILING_TOOLS -inline void set_name(spin_rw_mutex& obj, const char* name) { - itt_set_sync_name(&obj, name); -} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { - itt_set_sync_name(&obj, name); -} -#endif // WIN -#else -inline void set_name(spin_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ -inline void set_name(spin_rw_mutex&, const wchar_t*) {} -#endif // WIN -#endif -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::spin_rw_mutex; -} // namespace v1 -namespace profiling { - using detail::d1::set_name; -} -} // namespace tbb - -#include "detail/_rtm_rw_mutex.h" - -namespace tbb { -inline namespace v1 { -#if __TBB_TSX_INTRINSICS_PRESENT - using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex; -#else - using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex; -#endif -} -} - -#endif /* __TBB_spin_rw_mutex_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_spin_rw_mutex_H +#define __TBB_spin_rw_mutex_H + +#include "detail/_namespace_injection.h" + +#include "profiling.h" + +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +#if __TBB_TSX_INTRINSICS_PRESENT +class rtm_rw_mutex; +#endif + +//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference +/** @ingroup synchronization */ +class spin_rw_mutex { +public: + //! Constructors + spin_rw_mutex() noexcept : m_state(0) { + create_itt_sync(this, "tbb::spin_rw_mutex", ""); + } + + //! Destructor + ~spin_rw_mutex() { + __TBB_ASSERT(!m_state, "destruction of an acquired mutex"); + } + + //! No Copy + spin_rw_mutex(const spin_rw_mutex&) = delete; + spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {} + + //! Acquire lock on given mutex. + scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) { + acquire(m, write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if (m_mutex) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire(spin_rw_mutex& m, bool write = true) { + m_is_writer = write; + m_mutex = &m; + if (write) { + m_mutex->lock(); + } else { + m_mutex->lock_shared(); + } + } + + //! Try acquire lock on given mutex. + bool try_acquire(spin_rw_mutex& m, bool write = true) { + m_is_writer = write; + bool result = write ? m.try_lock() : m.try_lock_shared(); + if (result) { + m_mutex = &m; + } + return result; + } + + //! Release lock. + void release() { + spin_rw_mutex* m = m_mutex; + m_mutex = nullptr; + + if (m_is_writer) { + m->unlock(); + } else { + m->unlock_shared(); + } + } + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade_to_writer() { + if (m_is_writer) return true; // Already a writer + m_is_writer = true; + return m_mutex->upgrade(); + } + + //! Downgrade writer to become a reader. + bool downgrade_to_reader() { + if (!m_is_writer) return true; // Already a reader + m_mutex->downgrade(); + m_is_writer = false; + return true; + } + + protected: + //! The pointer to the current mutex that is held, or nullptr if no mutex is held. + spin_rw_mutex* m_mutex; + + //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. + /** Not defined if not holding a lock. */ + bool m_is_writer; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + void lock() { + call_itt_notify(prepare, this); + for (atomic_backoff backoff; ; backoff.pause()) { + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & BUSY)) { // no readers, no writers + if (m_state.compare_exchange_strong(s, WRITER)) + break; // successfully stored writer flag + backoff.reset(); // we could be very close to complete op. + } else if (!(s & WRITER_PENDING)) { // no pending writers + m_state |= WRITER_PENDING; + } + } + call_itt_notify(acquired, this); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + // for a writer: only possible to acquire if no active readers or writers + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 + if (m_state.compare_exchange_strong(s, WRITER)) { + call_itt_notify(acquired, this); + return true; // successfully stored writer flag + } + } + return false; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + m_state &= READERS; + } + + //! Lock shared ownership mutex + void lock_shared() { + call_itt_notify(prepare, this); + for (atomic_backoff b; ; b.pause()) { + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests + state_type prev_state = m_state.fetch_add(ONE_READER); + if (!(prev_state & WRITER)) { + break; // successfully stored increased number of readers + } + // writer got there first, undo the increment + m_state -= ONE_READER; + } + } + call_itt_notify(acquired, this); + __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); + } + + //! Try lock shared ownership mutex + bool try_lock_shared() { + // for a reader: acquire if no active or waiting writers + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & (WRITER | WRITER_PENDING))) { // no writers + state_type prev_state = m_state.fetch_add(ONE_READER); + if (!(prev_state & WRITER)) { // got the lock + call_itt_notify(acquired, this); + return true; // successfully stored increased number of readers + } + // writer got there first, undo the increment + m_state -= ONE_READER; + } + return false; + } + + //! Unlock shared ownership mutex + void unlock_shared() { + __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); + call_itt_notify(releasing, this); + m_state -= ONE_READER; + } + +protected: + /** Internal non ISO C++ standard API **/ + //! This API is used through the scoped_lock class + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade() { + state_type s = m_state.load(std::memory_order_relaxed); + __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); + // Check and set writer-pending flag. + // Required conditions: either no pending writers, or we are the only reader + // (with multiple readers and pending writer, another upgrade could have been requested) + while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { + if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { + atomic_backoff backoff; + while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause(); + __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); + // Both new readers and writers are blocked at this time + m_state -= (ONE_READER + WRITER_PENDING); + return true; // successfully upgraded + } + } + // Slow reacquire + unlock_shared(); + lock(); + return false; + } + + //! Downgrade writer to a reader + void downgrade() { + call_itt_notify(releasing, this); + m_state += (ONE_READER - WRITER); + __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers"); + } + + using state_type = std::intptr_t; + static constexpr state_type WRITER = 1; + static constexpr state_type WRITER_PENDING = 2; + static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); + static constexpr state_type ONE_READER = 4; + static constexpr state_type BUSY = WRITER | READERS; + //! State of lock + /** Bit 0 = writer is holding lock + Bit 1 = request by a writer to acquire lock (hint to readers to wait) + Bit 2..N = number of readers holding lock */ + std::atomic<state_type> m_state; +}; // class spin_rw_mutex + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(spin_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(spin_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_rw_mutex&, const wchar_t*) {} +#endif // WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::spin_rw_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#include "detail/_rtm_rw_mutex.h" + +namespace tbb { +inline namespace v1 { +#if __TBB_TSX_INTRINSICS_PRESENT + using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex; +#else + using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex; +#endif +} +} + +#endif /* __TBB_spin_rw_mutex_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/task.h b/contrib/libs/tbb/include/oneapi/tbb/task.h index 82ce1df6cd..1d242e4cc3 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task.h @@ -1,37 +1,37 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_task_H -#define __TBB_task_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_task.h" - -namespace tbb { -inline namespace v1 { -namespace task { -#if __TBB_RESUMABLE_TASKS - using detail::d1::suspend_point; - using detail::d1::resume; - using detail::d1::suspend; -#endif /* __TBB_RESUMABLE_TASKS */ - using detail::d1::current_context; -} // namespace task -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_task_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_H +#define __TBB_task_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_task.h" + +namespace tbb { +inline namespace v1 { +namespace task { +#if __TBB_RESUMABLE_TASKS + using detail::d1::suspend_point; + using detail::d1::resume; + using detail::d1::suspend; +#endif /* __TBB_RESUMABLE_TASKS */ + using detail::d1::current_context; +} // namespace task +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_task_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h index f1d0f9dea3..b83c5d7866 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h @@ -1,452 +1,452 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_task_arena_H -#define __TBB_task_arena_H - -#include "detail/_namespace_injection.h" -#include "detail/_task.h" -#include "detail/_exception.h" -#include "detail/_aligned_space.h" -#include "detail/_small_object_pool.h" - -#if __TBB_ARENA_BINDING -#include "info.h" -#endif /*__TBB_ARENA_BINDING*/ - -namespace tbb { -namespace detail { - -namespace d1 { - -template<typename F, typename R> -class task_arena_function : public delegate_base { - F &my_func; - aligned_space<R> my_return_storage; - bool my_constructed{false}; - // The function should be called only once. - bool operator()() const override { - new (my_return_storage.begin()) R(my_func()); - return true; - } -public: - task_arena_function(F& f) : my_func(f) {} - // The function can be called only after operator() and only once. - R consume_result() { - my_constructed = true; - return std::move(*(my_return_storage.begin())); - } - ~task_arena_function() override { - if (my_constructed) { - my_return_storage.begin()->~R(); - } - } -}; - -template<typename F> -class task_arena_function<F,void> : public delegate_base { - F &my_func; - bool operator()() const override { - my_func(); - return true; - } -public: - task_arena_function(F& f) : my_func(f) {} - void consume_result() const {} - - friend class task_arena_base; -}; - -class task_arena_base; -class task_scheduler_observer; -} // namespace d1 - -namespace r1 { -class arena; -struct task_arena_impl; - -void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); -void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); -void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); -bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); -void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); -void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); -int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); -void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); - -void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); -void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); -} // namespace r1 - -namespace d1 { - -static constexpr int priority_stride = INT_MAX / 4; - -class task_arena_base { - friend struct r1::task_arena_impl; - friend void r1::observe(d1::task_scheduler_observer&, bool); -public: - enum class priority : int { - low = 1 * priority_stride, - normal = 2 * priority_stride, - high = 3 * priority_stride - }; -#if __TBB_ARENA_BINDING - using constraints = tbb::detail::d1::constraints; -#endif /*__TBB_ARENA_BINDING*/ -protected: - //! Special settings - intptr_t my_version_and_traits; - - std::atomic<do_once_state> my_initialization_state; - - //! NULL if not currently initialized. - std::atomic<r1::arena*> my_arena; - static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), - "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); - - //! Concurrency level for deferred initialization - int my_max_concurrency; - - //! Reserved slots for external threads - unsigned my_num_reserved_slots; - - //! Arena priority - priority my_priority; - - //! The NUMA node index to which the arena will be attached - numa_node_id my_numa_id; - - //! The core type index to which arena will be attached - core_type_id my_core_type; - - //! Number of threads per core - int my_max_threads_per_core; - - // Backward compatibility checks. - core_type_id core_type() const { - return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic; - } - int max_threads_per_core() const { - return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic; - } - - enum { - default_flags = 0 - , core_type_support_flag = 1 - }; - - task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) - : my_version_and_traits(default_flags | core_type_support_flag) - , my_initialization_state(do_once_state::uninitialized) - , my_arena(nullptr) - , my_max_concurrency(max_concurrency) - , my_num_reserved_slots(reserved_for_masters) - , my_priority(a_priority) - , my_numa_id(automatic) - , my_core_type(automatic) - , my_max_threads_per_core(automatic) - {} - -#if __TBB_ARENA_BINDING - task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) - : my_version_and_traits(default_flags | core_type_support_flag) - , my_initialization_state(do_once_state::uninitialized) - , my_arena(nullptr) - , my_max_concurrency(constraints_.max_concurrency) - , my_num_reserved_slots(reserved_for_masters) - , my_priority(a_priority) - , my_numa_id(constraints_.numa_id) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - , my_core_type(constraints_.core_type) - , my_max_threads_per_core(constraints_.max_threads_per_core) -#else - , my_core_type(automatic) - , my_max_threads_per_core(automatic) -#endif - {} -#endif /*__TBB_ARENA_BINDING*/ -public: - //! Typedef for number of threads that is automatic. - static const int automatic = -1; - static const int not_initialized = -2; -}; - -template<typename R, typename F> -R isolate_impl(F& f) { - task_arena_function<F, R> func(f); - r1::isolate_within_arena(func, /*isolation*/ 0); - return func.consume_result(); -} - -/** 1-to-1 proxy representation class of scheduler's arena - * Constructors set up settings only, real construction is deferred till the first method invocation - * Destructor only removes one of the references to the inner arena representation. - * Final destruction happens when all the references (and the work) are gone. - */ -class task_arena : public task_arena_base { - - template <typename F> - class enqueue_task : public task { - small_object_allocator m_allocator; - const F m_func; - - void finalize(const execution_data& ed) { - m_allocator.delete_object(this, ed); - } - task* execute(execution_data& ed) override { - m_func(); - finalize(ed); - return nullptr; - } - task* cancel(execution_data&) override { - __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); - return nullptr; - } - public: - enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} - enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} - }; - - void mark_initialized() { - __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); - my_initialization_state.store(do_once_state::initialized, std::memory_order_release); - } - - template<typename F> - void enqueue_impl(F&& f) { - initialize(); - small_object_allocator alloc{}; - r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this); - } - - template<typename R, typename F> - R execute_impl(F& f) { - initialize(); - task_arena_function<F, R> func(f); - r1::execute(*this, func); - return func.consume_result(); - } -public: - //! Creates task_arena with certain concurrency limits - /** Sets up settings only, real construction is deferred till the first method invocation - * @arg max_concurrency specifies total number of slots in arena where threads work - * @arg reserved_for_masters specifies number of slots to be used by external threads only. - * Value of 1 is default and reflects behavior of implicit arenas. - **/ - task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1, - priority a_priority = priority::normal) - : task_arena_base(max_concurrency_, reserved_for_masters, a_priority) - {} - -#if __TBB_ARENA_BINDING - //! Creates task arena pinned to certain NUMA node - task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1, - priority a_priority = priority::normal) - : task_arena_base(constraints_, reserved_for_masters, a_priority) - {} - - //! Copies settings from another task_arena - task_arena(const task_arena &s) // copy settings but not the reference or instance - : task_arena_base( - constraints{} - .set_numa_id(s.my_numa_id) - .set_max_concurrency(s.my_max_concurrency) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - .set_core_type(s.my_core_type) - .set_max_threads_per_core(s.my_max_threads_per_core) -#endif - , s.my_num_reserved_slots, s.my_priority) - {} -#else - //! Copies settings from another task_arena - task_arena(const task_arena& a) // copy settings but not the reference or instance - : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority) - {} -#endif /*__TBB_ARENA_BINDING*/ - - //! Tag class used to indicate the "attaching" constructor - struct attach {}; - - //! Creates an instance of task_arena attached to the current arena of the thread - explicit task_arena( attach ) - : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails - { - if (r1::attach(*this)) { - mark_initialized(); - } - } - - //! Forces allocation of the resources for the task_arena as specified in constructor arguments - void initialize() { - atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); - } - - //! Overrides concurrency level and forces initialization of internal representation - void initialize(int max_concurrency_, unsigned reserved_for_masters = 1, - priority a_priority = priority::normal) - { - __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); - if( !is_active() ) { - my_max_concurrency = max_concurrency_; - my_num_reserved_slots = reserved_for_masters; - my_priority = a_priority; - r1::initialize(*this); - mark_initialized(); - } - } - -#if __TBB_ARENA_BINDING - void initialize(constraints constraints_, unsigned reserved_for_masters = 1, - priority a_priority = priority::normal) - { - __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); - if( !is_active() ) { - my_numa_id = constraints_.numa_id; - my_max_concurrency = constraints_.max_concurrency; -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - my_core_type = constraints_.core_type; - my_max_threads_per_core = constraints_.max_threads_per_core; -#endif - my_num_reserved_slots = reserved_for_masters; - my_priority = a_priority; - r1::initialize(*this); - mark_initialized(); - } - } -#endif /*__TBB_ARENA_BINDING*/ - - //! Attaches this instance to the current arena of the thread - void initialize(attach) { - // TODO: decide if this call must be thread-safe - __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); - if( !is_active() ) { - if ( !r1::attach(*this) ) { - r1::initialize(*this); - } - mark_initialized(); - } - } - - //! Removes the reference to the internal arena representation. - //! Not thread safe wrt concurrent invocations of other methods. - void terminate() { - if( is_active() ) { - r1::terminate(*this); - my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed); - } - } - - //! Removes the reference to the internal arena representation, and destroys the external object. - //! Not thread safe wrt concurrent invocations of other methods. - ~task_arena() { - terminate(); - } - - //! Returns true if the arena is active (initialized); false otherwise. - //! The name was chosen to match a task_scheduler_init method with the same semantics. - bool is_active() const { - return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized; - } - - //! Enqueues a task into the arena to process a functor, and immediately returns. - //! Does not require the calling thread to join the arena - - template<typename F> - void enqueue(F&& f) { - enqueue_impl(std::forward<F>(f)); - } - - //! Joins the arena and executes a mutable functor, then returns - //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion - //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread - //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). - template<typename F> - auto execute(F&& f) -> decltype(f()) { - return execute_impl<decltype(f())>(f); - } - -#if __TBB_EXTRA_DEBUG - //! Returns my_num_reserved_slots - int debug_reserved_slots() const { - // Handle special cases inside the library - return my_num_reserved_slots; - } - - //! Returns my_max_concurrency - int debug_max_concurrency() const { - // Handle special cases inside the library - return my_max_concurrency; - } - - //! Wait for all work in the arena to be completed - //! Even submitted by other application threads - //! Joins arena if/when possible (in the same way as execute()) - void debug_wait_until_empty() { - initialize(); - r1::wait(*this); - } -#endif //__TBB_EXTRA_DEBUG - - //! Returns the maximal number of threads that can work inside the arena - int max_concurrency() const { - // Handle special cases inside the library - return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this); - } - - friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) { - __TBB_ASSERT(ta.is_active(), nullptr); - call_itt_task_notify(releasing, &t); - r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0); - } -}; - -//! Executes a mutable functor in isolation within the current task arena. -//! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). -template<typename F> -inline auto isolate(F&& f) -> decltype(f()) { - return isolate_impl<decltype(f())>(f); -} - -//! Returns the index, aka slot number, of the calling thread in its current arena -inline int current_thread_index() { - int idx = r1::execution_slot(nullptr); - return idx == -1 ? task_arena_base::not_initialized : idx; -} - -//! Returns the maximal number of threads that can work inside the arena -inline int max_concurrency() { - return r1::max_concurrency(nullptr); -} - -using r1::submit; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::task_arena; - -namespace this_task_arena { -using detail::d1::current_thread_index; -using detail::d1::max_concurrency; -using detail::d1::isolate; -} // namespace this_task_arena - -} // inline namespace v1 - -} // namespace tbb -#endif /* __TBB_task_arena_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_arena_H +#define __TBB_task_arena_H + +#include "detail/_namespace_injection.h" +#include "detail/_task.h" +#include "detail/_exception.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#if __TBB_ARENA_BINDING +#include "info.h" +#endif /*__TBB_ARENA_BINDING*/ + +namespace tbb { +namespace detail { + +namespace d1 { + +template<typename F, typename R> +class task_arena_function : public delegate_base { + F &my_func; + aligned_space<R> my_return_storage; + bool my_constructed{false}; + // The function should be called only once. + bool operator()() const override { + new (my_return_storage.begin()) R(my_func()); + return true; + } +public: + task_arena_function(F& f) : my_func(f) {} + // The function can be called only after operator() and only once. + R consume_result() { + my_constructed = true; + return std::move(*(my_return_storage.begin())); + } + ~task_arena_function() override { + if (my_constructed) { + my_return_storage.begin()->~R(); + } + } +}; + +template<typename F> +class task_arena_function<F,void> : public delegate_base { + F &my_func; + bool operator()() const override { + my_func(); + return true; + } +public: + task_arena_function(F& f) : my_func(f) {} + void consume_result() const {} + + friend class task_arena_base; +}; + +class task_arena_base; +class task_scheduler_observer; +} // namespace d1 + +namespace r1 { +class arena; +struct task_arena_impl; + +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); +void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); +void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); +bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); +int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); +void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); + +void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); +void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); +} // namespace r1 + +namespace d1 { + +static constexpr int priority_stride = INT_MAX / 4; + +class task_arena_base { + friend struct r1::task_arena_impl; + friend void r1::observe(d1::task_scheduler_observer&, bool); +public: + enum class priority : int { + low = 1 * priority_stride, + normal = 2 * priority_stride, + high = 3 * priority_stride + }; +#if __TBB_ARENA_BINDING + using constraints = tbb::detail::d1::constraints; +#endif /*__TBB_ARENA_BINDING*/ +protected: + //! Special settings + intptr_t my_version_and_traits; + + std::atomic<do_once_state> my_initialization_state; + + //! NULL if not currently initialized. + std::atomic<r1::arena*> my_arena; + static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), + "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); + + //! Concurrency level for deferred initialization + int my_max_concurrency; + + //! Reserved slots for external threads + unsigned my_num_reserved_slots; + + //! Arena priority + priority my_priority; + + //! The NUMA node index to which the arena will be attached + numa_node_id my_numa_id; + + //! The core type index to which arena will be attached + core_type_id my_core_type; + + //! Number of threads per core + int my_max_threads_per_core; + + // Backward compatibility checks. + core_type_id core_type() const { + return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic; + } + int max_threads_per_core() const { + return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic; + } + + enum { + default_flags = 0 + , core_type_support_flag = 1 + }; + + task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) + : my_version_and_traits(default_flags | core_type_support_flag) + , my_initialization_state(do_once_state::uninitialized) + , my_arena(nullptr) + , my_max_concurrency(max_concurrency) + , my_num_reserved_slots(reserved_for_masters) + , my_priority(a_priority) + , my_numa_id(automatic) + , my_core_type(automatic) + , my_max_threads_per_core(automatic) + {} + +#if __TBB_ARENA_BINDING + task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) + : my_version_and_traits(default_flags | core_type_support_flag) + , my_initialization_state(do_once_state::uninitialized) + , my_arena(nullptr) + , my_max_concurrency(constraints_.max_concurrency) + , my_num_reserved_slots(reserved_for_masters) + , my_priority(a_priority) + , my_numa_id(constraints_.numa_id) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + , my_core_type(constraints_.core_type) + , my_max_threads_per_core(constraints_.max_threads_per_core) +#else + , my_core_type(automatic) + , my_max_threads_per_core(automatic) +#endif + {} +#endif /*__TBB_ARENA_BINDING*/ +public: + //! Typedef for number of threads that is automatic. + static const int automatic = -1; + static const int not_initialized = -2; +}; + +template<typename R, typename F> +R isolate_impl(F& f) { + task_arena_function<F, R> func(f); + r1::isolate_within_arena(func, /*isolation*/ 0); + return func.consume_result(); +} + +/** 1-to-1 proxy representation class of scheduler's arena + * Constructors set up settings only, real construction is deferred till the first method invocation + * Destructor only removes one of the references to the inner arena representation. + * Final destruction happens when all the references (and the work) are gone. + */ +class task_arena : public task_arena_base { + + template <typename F> + class enqueue_task : public task { + small_object_allocator m_allocator; + const F m_func; + + void finalize(const execution_data& ed) { + m_allocator.delete_object(this, ed); + } + task* execute(execution_data& ed) override { + m_func(); + finalize(ed); + return nullptr; + } + task* cancel(execution_data&) override { + __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); + return nullptr; + } + public: + enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} + enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} + }; + + void mark_initialized() { + __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); + my_initialization_state.store(do_once_state::initialized, std::memory_order_release); + } + + template<typename F> + void enqueue_impl(F&& f) { + initialize(); + small_object_allocator alloc{}; + r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this); + } + + template<typename R, typename F> + R execute_impl(F& f) { + initialize(); + task_arena_function<F, R> func(f); + r1::execute(*this, func); + return func.consume_result(); + } +public: + //! Creates task_arena with certain concurrency limits + /** Sets up settings only, real construction is deferred till the first method invocation + * @arg max_concurrency specifies total number of slots in arena where threads work + * @arg reserved_for_masters specifies number of slots to be used by external threads only. + * Value of 1 is default and reflects behavior of implicit arenas. + **/ + task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + : task_arena_base(max_concurrency_, reserved_for_masters, a_priority) + {} + +#if __TBB_ARENA_BINDING + //! Creates task arena pinned to certain NUMA node + task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + : task_arena_base(constraints_, reserved_for_masters, a_priority) + {} + + //! Copies settings from another task_arena + task_arena(const task_arena &s) // copy settings but not the reference or instance + : task_arena_base( + constraints{} + .set_numa_id(s.my_numa_id) + .set_max_concurrency(s.my_max_concurrency) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + .set_core_type(s.my_core_type) + .set_max_threads_per_core(s.my_max_threads_per_core) +#endif + , s.my_num_reserved_slots, s.my_priority) + {} +#else + //! Copies settings from another task_arena + task_arena(const task_arena& a) // copy settings but not the reference or instance + : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority) + {} +#endif /*__TBB_ARENA_BINDING*/ + + //! Tag class used to indicate the "attaching" constructor + struct attach {}; + + //! Creates an instance of task_arena attached to the current arena of the thread + explicit task_arena( attach ) + : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails + { + if (r1::attach(*this)) { + mark_initialized(); + } + } + + //! Forces allocation of the resources for the task_arena as specified in constructor arguments + void initialize() { + atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); + } + + //! Overrides concurrency level and forces initialization of internal representation + void initialize(int max_concurrency_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + { + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + my_max_concurrency = max_concurrency_; + my_num_reserved_slots = reserved_for_masters; + my_priority = a_priority; + r1::initialize(*this); + mark_initialized(); + } + } + +#if __TBB_ARENA_BINDING + void initialize(constraints constraints_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + { + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + my_numa_id = constraints_.numa_id; + my_max_concurrency = constraints_.max_concurrency; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + my_core_type = constraints_.core_type; + my_max_threads_per_core = constraints_.max_threads_per_core; +#endif + my_num_reserved_slots = reserved_for_masters; + my_priority = a_priority; + r1::initialize(*this); + mark_initialized(); + } + } +#endif /*__TBB_ARENA_BINDING*/ + + //! Attaches this instance to the current arena of the thread + void initialize(attach) { + // TODO: decide if this call must be thread-safe + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + if ( !r1::attach(*this) ) { + r1::initialize(*this); + } + mark_initialized(); + } + } + + //! Removes the reference to the internal arena representation. + //! Not thread safe wrt concurrent invocations of other methods. + void terminate() { + if( is_active() ) { + r1::terminate(*this); + my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed); + } + } + + //! Removes the reference to the internal arena representation, and destroys the external object. + //! Not thread safe wrt concurrent invocations of other methods. + ~task_arena() { + terminate(); + } + + //! Returns true if the arena is active (initialized); false otherwise. + //! The name was chosen to match a task_scheduler_init method with the same semantics. + bool is_active() const { + return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized; + } + + //! Enqueues a task into the arena to process a functor, and immediately returns. + //! Does not require the calling thread to join the arena + + template<typename F> + void enqueue(F&& f) { + enqueue_impl(std::forward<F>(f)); + } + + //! Joins the arena and executes a mutable functor, then returns + //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion + //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread + //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). + template<typename F> + auto execute(F&& f) -> decltype(f()) { + return execute_impl<decltype(f())>(f); + } + +#if __TBB_EXTRA_DEBUG + //! Returns my_num_reserved_slots + int debug_reserved_slots() const { + // Handle special cases inside the library + return my_num_reserved_slots; + } + + //! Returns my_max_concurrency + int debug_max_concurrency() const { + // Handle special cases inside the library + return my_max_concurrency; + } + + //! Wait for all work in the arena to be completed + //! Even submitted by other application threads + //! Joins arena if/when possible (in the same way as execute()) + void debug_wait_until_empty() { + initialize(); + r1::wait(*this); + } +#endif //__TBB_EXTRA_DEBUG + + //! Returns the maximal number of threads that can work inside the arena + int max_concurrency() const { + // Handle special cases inside the library + return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this); + } + + friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) { + __TBB_ASSERT(ta.is_active(), nullptr); + call_itt_task_notify(releasing, &t); + r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0); + } +}; + +//! Executes a mutable functor in isolation within the current task arena. +//! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). +template<typename F> +inline auto isolate(F&& f) -> decltype(f()) { + return isolate_impl<decltype(f())>(f); +} + +//! Returns the index, aka slot number, of the calling thread in its current arena +inline int current_thread_index() { + int idx = r1::execution_slot(nullptr); + return idx == -1 ? task_arena_base::not_initialized : idx; +} + +//! Returns the maximal number of threads that can work inside the arena +inline int max_concurrency() { + return r1::max_concurrency(nullptr); +} + +using r1::submit; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::task_arena; + +namespace this_task_arena { +using detail::d1::current_thread_index; +using detail::d1::max_concurrency; +using detail::d1::isolate; +} // namespace this_task_arena + +} // inline namespace v1 + +} // namespace tbb +#endif /* __TBB_task_arena_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_group.h b/contrib/libs/tbb/include/oneapi/tbb/task_group.h index e82553076a..0aa7c46a8c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_group.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_group.h @@ -1,556 +1,556 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_task_group_H -#define __TBB_task_group_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" -#include "detail/_template_helpers.h" -#include "detail/_utils.h" -#include "detail/_exception.h" -#include "detail/_task.h" -#include "detail/_small_object_pool.h" - -#include "profiling.h" - -#include <functional> - -#if _MSC_VER && !defined(__INTEL_COMPILER) - // Suppress warning: structure was padded due to alignment specifier - #pragma warning(push) - #pragma warning(disable:4324) -#endif - -namespace tbb { -namespace detail { - -namespace d1 { -class delegate_base; -class task_arena_base; -class task_group_context; -} - -namespace r1 { -// Forward declarations -class tbb_exception_ptr; -class market; -class thread_data; -class task_dispatcher; -template <bool> -class context_guard_helper; -struct task_arena_impl; - -void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); -void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); - -void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); -void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); -void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); -bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); -bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); -void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); - -struct task_group_context_impl; -} - -namespace d1 { - -struct context_list_node { - std::atomic<context_list_node*> prev{}; - std::atomic<context_list_node*> next{}; - - void remove_relaxed() { - context_list_node* p = prev.load(std::memory_order_relaxed); - context_list_node* n = next.load(std::memory_order_relaxed); - p->next.store(n, std::memory_order_relaxed); - n->prev.store(p, std::memory_order_relaxed); - } -}; - -//! Used to form groups of tasks -/** @ingroup task_scheduling - The context services explicit cancellation requests from user code, and unhandled - exceptions intercepted during tasks execution. Intercepting an exception results - in generating internal cancellation requests (which is processed in exactly the - same way as external ones). - - The context is associated with one or more root tasks and defines the cancellation - group that includes all the descendants of the corresponding root task(s). Association - is established when a context object is passed as an argument to the task::allocate_root() - method. See task_group_context::task_group_context for more details. - - The context can be bound to another one, and other contexts can be bound to it, - forming a tree-like structure: parent -> this -> children. Arrows here designate - cancellation propagation direction. If a task in a cancellation group is cancelled - all the other tasks in this group and groups bound to it (as children) get cancelled too. -**/ -class task_group_context : no_copy { -public: - enum traits_type { - fp_settings = 1 << 1, - concurrent_wait = 1 << 2, - default_traits = 0 - }; - enum kind_type { - isolated, - bound - }; -private: - //! Space for platform-specific FPU settings. - /** Must only be accessed inside TBB binaries, and never directly in user - code or inline methods. */ - std::uint64_t my_cpu_ctl_env; - - //! Specifies whether cancellation was requested for this task group. - std::atomic<std::uint32_t> my_cancellation_requested; - - //! Version for run-time checks and behavioral traits of the context. - std::uint8_t my_version; - - //! The context traits. - struct context_traits { - bool fp_settings : 1; - bool concurrent_wait : 1; - bool bound : 1; - } my_traits; - - static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); - - static constexpr std::uint8_t may_have_children = 1; - //! The context internal state (currently only may_have_children). - std::atomic<std::uint8_t> my_state; - - enum class lifetime_state : std::uint8_t { - created, - locked, - isolated, - bound, - detached, - dying - }; - - //! The synchronization machine state to manage lifetime. - std::atomic<lifetime_state> my_lifetime_state; - - //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. - task_group_context* my_parent; - - //! Thread data instance that registered this context in its list. - std::atomic<r1::thread_data*> my_owner; - - //! Used to form the thread specific list of contexts without additional memory allocation. - /** A context is included into the list of the current thread when its binding to - its parent happens. Any context can be present in the list of one thread only. **/ - context_list_node my_node; - - //! Pointer to the container storing exception being propagated across this task group. - r1::tbb_exception_ptr* my_exception; - - //! Used to set and maintain stack stitching point for Intel Performance Tools. - void* my_itt_caller; - - //! Description of algorithm for scheduler based instrumentation. - string_resource_index my_name; - - char padding[max_nfs_size - - sizeof(std::uint64_t) // my_cpu_ctl_env - - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested - - sizeof(std::uint8_t) // my_version - - sizeof(context_traits) // my_traits - - sizeof(std::atomic<std::uint8_t>) // my_state - - sizeof(std::atomic<lifetime_state>) // my_lifetime_state - - sizeof(task_group_context*) // my_parent - - sizeof(std::atomic<r1::thread_data*>) // my_owner - - sizeof(context_list_node) // my_node - - sizeof(r1::tbb_exception_ptr*) // my_exception - - sizeof(void*) // my_itt_caller - - sizeof(string_resource_index) // my_name - ]; - - task_group_context(context_traits t, string_resource_index name) - : my_version{}, my_name{ name } { - my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) - r1::initialize(*this); - } - - static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { - context_traits ct; - ct.bound = relation_with_parent == bound; - ct.fp_settings = (user_traits & fp_settings) == fp_settings; - ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; - return ct; - } - -public: - //! Default & binding constructor. - /** By default a bound context is created. That is this context will be bound - (as child) to the context of the currently executing task . Cancellation - requests passed to the parent context are propagated to all the contexts - bound to it. Similarly priority change is propagated from the parent context - to its children. - - If task_group_context::isolated is used as the argument, then the tasks associated - with this context will never be affected by events in any other context. - - Creating isolated contexts involve much less overhead, but they have limited - utility. Normally when an exception occurs in an algorithm that has nested - ones running, it is desirably to have all the nested algorithms cancelled - as well. Such a behavior requires nested algorithms to use bound contexts. - - There is one good place where using isolated algorithms is beneficial. It is - an external thread. That is if a particular algorithm is invoked directly from - the external thread (not from a TBB task), supplying it with explicitly - created isolated context will result in a faster algorithm startup. - - VERSIONING NOTE: - Implementation(s) of task_group_context constructor(s) cannot be made - entirely out-of-line because the run-time version must be set by the user - code. This will become critically important for binary compatibility, if - we ever have to change the size of the context object. **/ - - task_group_context(kind_type relation_with_parent = bound, - std::uintptr_t t = default_traits) - : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} - - // Custom constructor for instrumentation of oneTBB algorithm - task_group_context (string_resource_index name ) - : task_group_context(make_traits(bound, default_traits), name) {} - - // Do not introduce any logic on user side since it might break state propagation assumptions - ~task_group_context() { - r1::destroy(*this); - } - - //! Forcefully reinitializes the context after the task tree it was associated with is completed. - /** Because the method assumes that all the tasks that used to be associated with - this context have already finished, calling it while the context is still - in use somewhere in the task hierarchy leads to undefined behavior. - - IMPORTANT: This method is not thread safe! - - The method does not change the context's parent if it is set. **/ - void reset() { - r1::reset(*this); - } - - //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. - /** \return false if cancellation has already been requested, true otherwise. - - Note that canceling never fails. When false is returned, it just means that - another thread (or this one) has already sent cancellation request to this - context or to one of its ancestors (if this context is bound). It is guaranteed - that when this method is concurrently called on the same not yet cancelled - context, true will be returned by one and only one invocation. **/ - bool cancel_group_execution() { - return r1::cancel_group_execution(*this); - } - - //! Returns true if the context received cancellation request. - bool is_group_execution_cancelled() { - return r1::is_group_execution_cancelled(*this); - } - -#if __TBB_FP_CONTEXT - //! Captures the current FPU control settings to the context. - /** Because the method assumes that all the tasks that used to be associated with - this context have already finished, calling it while the context is still - in use somewhere in the task hierarchy leads to undefined behavior. - - IMPORTANT: This method is not thread safe! - - The method does not change the FPU control settings of the context's parent. **/ - void capture_fp_settings() { - r1::capture_fp_settings(*this); - } -#endif - - //! Returns the user visible context trait - std::uintptr_t traits() const { - std::uintptr_t t{}; - t |= my_traits.fp_settings ? fp_settings : 0; - t |= my_traits.concurrent_wait ? concurrent_wait : 0; - return t; - } -private: - //// TODO: cleanup friends - friend class r1::market; - friend class r1::thread_data; - friend class r1::task_dispatcher; - template <bool> - friend class r1::context_guard_helper; - friend struct r1::task_arena_impl; - friend struct r1::task_group_context_impl; -}; // class task_group_context - -static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); - -enum task_group_status { - not_complete, - complete, - canceled -}; - -class task_group; -class structured_task_group; -#if TBB_PREVIEW_ISOLATED_TASK_GROUP -class isolated_task_group; -#endif - -template<typename F> -class function_task : public task { - const F m_func; - wait_context& m_wait_ctx; - small_object_allocator m_allocator; - - void finalize(const execution_data& ed) { - // Make a local reference not to access this after destruction. - wait_context& wo = m_wait_ctx; - // Copy allocator to the stack - auto allocator = m_allocator; - // Destroy user functor before release wait. - this->~function_task(); - wo.release(); - - allocator.deallocate(this, ed); - } - task* execute(execution_data& ed) override { - m_func(); - finalize(ed); - return nullptr; - } - task* cancel(execution_data& ed) override { - finalize(ed); - return nullptr; - } -public: - function_task(const F& f, wait_context& wo, small_object_allocator& alloc) - : m_func(f) - , m_wait_ctx(wo) - , m_allocator(alloc) {} - - function_task(F&& f, wait_context& wo, small_object_allocator& alloc) - : m_func(std::move(f)) - , m_wait_ctx(wo) - , m_allocator(alloc) {} -}; - -template <typename F> -class function_stack_task : public task { - const F& m_func; - wait_context& m_wait_ctx; - - void finalize() { - m_wait_ctx.release(); - } - task* execute(execution_data&) override { - m_func(); - finalize(); - return nullptr; - } - task* cancel(execution_data&) override { - finalize(); - return nullptr; - } -public: - function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} -}; - -class task_group_base : no_copy { -protected: - wait_context m_wait_ctx; - task_group_context m_context; - - template<typename F> - task_group_status internal_run_and_wait(const F& f) { - function_stack_task<F> t{ f, m_wait_ctx }; - m_wait_ctx.reserve(); - bool cancellation_status = false; - try_call([&] { - execute_and_wait(t, m_context, m_wait_ctx, m_context); - }).on_completion([&] { - // TODO: the reset method is not thread-safe. Ensure the correct behavior. - cancellation_status = m_context.is_group_execution_cancelled(); - m_context.reset(); - }); - return cancellation_status ? canceled : complete; - } - - template<typename F> - task* prepare_task(F&& f) { - m_wait_ctx.reserve(); - small_object_allocator alloc{}; - return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); - } - -public: - task_group_base(uintptr_t traits = 0) - : m_wait_ctx(0) - , m_context(task_group_context::bound, task_group_context::default_traits | traits) - { - } - - ~task_group_base() noexcept(false) { - if (m_wait_ctx.continue_execution()) { -#if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT - bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; -#else - bool stack_unwinding_in_progress = std::uncaught_exception(); -#endif - // Always attempt to do proper cleanup to avoid inevitable memory corruption - // in case of missing wait (for the sake of better testability & debuggability) - if (!m_context.is_group_execution_cancelled()) - cancel(); - d1::wait(m_wait_ctx, m_context); - if (!stack_unwinding_in_progress) - throw_exception(exception_id::missing_wait); - } - } - - task_group_status wait() { - bool cancellation_status = false; - try_call([&] { - d1::wait(m_wait_ctx, m_context); - }).on_completion([&] { - // TODO: the reset method is not thread-safe. Ensure the correct behavior. - cancellation_status = m_context.is_group_execution_cancelled(); - m_context.reset(); - }); - return cancellation_status ? canceled : complete; - } - - void cancel() { - m_context.cancel_group_execution(); - } -}; // class task_group_base - -class task_group : public task_group_base { -public: - task_group() : task_group_base(task_group_context::concurrent_wait) {} - - template<typename F> - void run(F&& f) { - spawn(*prepare_task(std::forward<F>(f)), m_context); - } - - template<typename F> - task_group_status run_and_wait(const F& f) { - return internal_run_and_wait(f); - } -}; // class task_group - -#if TBB_PREVIEW_ISOLATED_TASK_GROUP -class spawn_delegate : public delegate_base { - task* task_to_spawn; - task_group_context& context; - bool operator()() const override { - spawn(*task_to_spawn, context); - return true; - } -public: - spawn_delegate(task* a_task, task_group_context& ctx) - : task_to_spawn(a_task), context(ctx) - {} -}; - -class wait_delegate : public delegate_base { - bool operator()() const override { - status = tg.wait(); - return true; - } -protected: - task_group& tg; - task_group_status& status; -public: - wait_delegate(task_group& a_group, task_group_status& tgs) - : tg(a_group), status(tgs) {} -}; - -template<typename F> -class run_wait_delegate : public wait_delegate { - F& func; - bool operator()() const override { - status = tg.run_and_wait(func); - return true; - } -public: - run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs) - : wait_delegate(a_group, tgs), func(a_func) {} -}; - -class isolated_task_group : public task_group { - intptr_t this_isolation() { - return reinterpret_cast<intptr_t>(this); - } -public: - isolated_task_group () : task_group() {} - - template<typename F> - void run(F&& f) { - spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context); - r1::isolate_within_arena(sd, this_isolation()); - } - - template<typename F> - task_group_status run_and_wait( const F& f ) { - task_group_status result = not_complete; - run_wait_delegate<const F> rwd(*this, f, result); - r1::isolate_within_arena(rwd, this_isolation()); - __TBB_ASSERT(result != not_complete, "premature exit from wait?"); - return result; - } - - task_group_status wait() { - task_group_status result = not_complete; - wait_delegate wd(*this, result); - r1::isolate_within_arena(wd, this_isolation()); - __TBB_ASSERT(result != not_complete, "premature exit from wait?"); - return result; - } -}; // class isolated_task_group -#endif // TBB_PREVIEW_ISOLATED_TASK_GROUP - -inline bool is_current_task_group_canceling() { - task_group_context* ctx = current_context(); - return ctx ? ctx->is_group_execution_cancelled() : false; -} - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::task_group_context; -using detail::d1::task_group; -#if TBB_PREVIEW_ISOLATED_TASK_GROUP -using detail::d1::isolated_task_group; -#endif - -using detail::d1::task_group_status; -using detail::d1::not_complete; -using detail::d1::complete; -using detail::d1::canceled; - -using detail::d1::is_current_task_group_canceling; -using detail::r1::missing_wait; -} - -} // namespace tbb - -#if _MSC_VER && !defined(__INTEL_COMPILER) - #pragma warning(pop) // 4324 warning -#endif - -#endif // __TBB_task_group_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_group_H +#define __TBB_task_group_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_template_helpers.h" +#include "detail/_utils.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" + +#include "profiling.h" + +#include <functional> + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning(push) + #pragma warning(disable:4324) +#endif + +namespace tbb { +namespace detail { + +namespace d1 { +class delegate_base; +class task_arena_base; +class task_group_context; +} + +namespace r1 { +// Forward declarations +class tbb_exception_ptr; +class market; +class thread_data; +class task_dispatcher; +template <bool> +class context_guard_helper; +struct task_arena_impl; + +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); + +void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); +void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); +void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); +bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); +bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); +void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); + +struct task_group_context_impl; +} + +namespace d1 { + +struct context_list_node { + std::atomic<context_list_node*> prev{}; + std::atomic<context_list_node*> next{}; + + void remove_relaxed() { + context_list_node* p = prev.load(std::memory_order_relaxed); + context_list_node* n = next.load(std::memory_order_relaxed); + p->next.store(n, std::memory_order_relaxed); + n->prev.store(p, std::memory_order_relaxed); + } +}; + +//! Used to form groups of tasks +/** @ingroup task_scheduling + The context services explicit cancellation requests from user code, and unhandled + exceptions intercepted during tasks execution. Intercepting an exception results + in generating internal cancellation requests (which is processed in exactly the + same way as external ones). + + The context is associated with one or more root tasks and defines the cancellation + group that includes all the descendants of the corresponding root task(s). Association + is established when a context object is passed as an argument to the task::allocate_root() + method. See task_group_context::task_group_context for more details. + + The context can be bound to another one, and other contexts can be bound to it, + forming a tree-like structure: parent -> this -> children. Arrows here designate + cancellation propagation direction. If a task in a cancellation group is cancelled + all the other tasks in this group and groups bound to it (as children) get cancelled too. +**/ +class task_group_context : no_copy { +public: + enum traits_type { + fp_settings = 1 << 1, + concurrent_wait = 1 << 2, + default_traits = 0 + }; + enum kind_type { + isolated, + bound + }; +private: + //! Space for platform-specific FPU settings. + /** Must only be accessed inside TBB binaries, and never directly in user + code or inline methods. */ + std::uint64_t my_cpu_ctl_env; + + //! Specifies whether cancellation was requested for this task group. + std::atomic<std::uint32_t> my_cancellation_requested; + + //! Version for run-time checks and behavioral traits of the context. + std::uint8_t my_version; + + //! The context traits. + struct context_traits { + bool fp_settings : 1; + bool concurrent_wait : 1; + bool bound : 1; + } my_traits; + + static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); + + static constexpr std::uint8_t may_have_children = 1; + //! The context internal state (currently only may_have_children). + std::atomic<std::uint8_t> my_state; + + enum class lifetime_state : std::uint8_t { + created, + locked, + isolated, + bound, + detached, + dying + }; + + //! The synchronization machine state to manage lifetime. + std::atomic<lifetime_state> my_lifetime_state; + + //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. + task_group_context* my_parent; + + //! Thread data instance that registered this context in its list. + std::atomic<r1::thread_data*> my_owner; + + //! Used to form the thread specific list of contexts without additional memory allocation. + /** A context is included into the list of the current thread when its binding to + its parent happens. Any context can be present in the list of one thread only. **/ + context_list_node my_node; + + //! Pointer to the container storing exception being propagated across this task group. + r1::tbb_exception_ptr* my_exception; + + //! Used to set and maintain stack stitching point for Intel Performance Tools. + void* my_itt_caller; + + //! Description of algorithm for scheduler based instrumentation. + string_resource_index my_name; + + char padding[max_nfs_size + - sizeof(std::uint64_t) // my_cpu_ctl_env + - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested + - sizeof(std::uint8_t) // my_version + - sizeof(context_traits) // my_traits + - sizeof(std::atomic<std::uint8_t>) // my_state + - sizeof(std::atomic<lifetime_state>) // my_lifetime_state + - sizeof(task_group_context*) // my_parent + - sizeof(std::atomic<r1::thread_data*>) // my_owner + - sizeof(context_list_node) // my_node + - sizeof(r1::tbb_exception_ptr*) // my_exception + - sizeof(void*) // my_itt_caller + - sizeof(string_resource_index) // my_name + ]; + + task_group_context(context_traits t, string_resource_index name) + : my_version{}, my_name{ name } { + my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) + r1::initialize(*this); + } + + static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { + context_traits ct; + ct.bound = relation_with_parent == bound; + ct.fp_settings = (user_traits & fp_settings) == fp_settings; + ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; + return ct; + } + +public: + //! Default & binding constructor. + /** By default a bound context is created. That is this context will be bound + (as child) to the context of the currently executing task . Cancellation + requests passed to the parent context are propagated to all the contexts + bound to it. Similarly priority change is propagated from the parent context + to its children. + + If task_group_context::isolated is used as the argument, then the tasks associated + with this context will never be affected by events in any other context. + + Creating isolated contexts involve much less overhead, but they have limited + utility. Normally when an exception occurs in an algorithm that has nested + ones running, it is desirably to have all the nested algorithms cancelled + as well. Such a behavior requires nested algorithms to use bound contexts. + + There is one good place where using isolated algorithms is beneficial. It is + an external thread. That is if a particular algorithm is invoked directly from + the external thread (not from a TBB task), supplying it with explicitly + created isolated context will result in a faster algorithm startup. + + VERSIONING NOTE: + Implementation(s) of task_group_context constructor(s) cannot be made + entirely out-of-line because the run-time version must be set by the user + code. This will become critically important for binary compatibility, if + we ever have to change the size of the context object. **/ + + task_group_context(kind_type relation_with_parent = bound, + std::uintptr_t t = default_traits) + : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} + + // Custom constructor for instrumentation of oneTBB algorithm + task_group_context (string_resource_index name ) + : task_group_context(make_traits(bound, default_traits), name) {} + + // Do not introduce any logic on user side since it might break state propagation assumptions + ~task_group_context() { + r1::destroy(*this); + } + + //! Forcefully reinitializes the context after the task tree it was associated with is completed. + /** Because the method assumes that all the tasks that used to be associated with + this context have already finished, calling it while the context is still + in use somewhere in the task hierarchy leads to undefined behavior. + + IMPORTANT: This method is not thread safe! + + The method does not change the context's parent if it is set. **/ + void reset() { + r1::reset(*this); + } + + //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. + /** \return false if cancellation has already been requested, true otherwise. + + Note that canceling never fails. When false is returned, it just means that + another thread (or this one) has already sent cancellation request to this + context or to one of its ancestors (if this context is bound). It is guaranteed + that when this method is concurrently called on the same not yet cancelled + context, true will be returned by one and only one invocation. **/ + bool cancel_group_execution() { + return r1::cancel_group_execution(*this); + } + + //! Returns true if the context received cancellation request. + bool is_group_execution_cancelled() { + return r1::is_group_execution_cancelled(*this); + } + +#if __TBB_FP_CONTEXT + //! Captures the current FPU control settings to the context. + /** Because the method assumes that all the tasks that used to be associated with + this context have already finished, calling it while the context is still + in use somewhere in the task hierarchy leads to undefined behavior. + + IMPORTANT: This method is not thread safe! + + The method does not change the FPU control settings of the context's parent. **/ + void capture_fp_settings() { + r1::capture_fp_settings(*this); + } +#endif + + //! Returns the user visible context trait + std::uintptr_t traits() const { + std::uintptr_t t{}; + t |= my_traits.fp_settings ? fp_settings : 0; + t |= my_traits.concurrent_wait ? concurrent_wait : 0; + return t; + } +private: + //// TODO: cleanup friends + friend class r1::market; + friend class r1::thread_data; + friend class r1::task_dispatcher; + template <bool> + friend class r1::context_guard_helper; + friend struct r1::task_arena_impl; + friend struct r1::task_group_context_impl; +}; // class task_group_context + +static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); + +enum task_group_status { + not_complete, + complete, + canceled +}; + +class task_group; +class structured_task_group; +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +class isolated_task_group; +#endif + +template<typename F> +class function_task : public task { + const F m_func; + wait_context& m_wait_ctx; + small_object_allocator m_allocator; + + void finalize(const execution_data& ed) { + // Make a local reference not to access this after destruction. + wait_context& wo = m_wait_ctx; + // Copy allocator to the stack + auto allocator = m_allocator; + // Destroy user functor before release wait. + this->~function_task(); + wo.release(); + + allocator.deallocate(this, ed); + } + task* execute(execution_data& ed) override { + m_func(); + finalize(ed); + return nullptr; + } + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } +public: + function_task(const F& f, wait_context& wo, small_object_allocator& alloc) + : m_func(f) + , m_wait_ctx(wo) + , m_allocator(alloc) {} + + function_task(F&& f, wait_context& wo, small_object_allocator& alloc) + : m_func(std::move(f)) + , m_wait_ctx(wo) + , m_allocator(alloc) {} +}; + +template <typename F> +class function_stack_task : public task { + const F& m_func; + wait_context& m_wait_ctx; + + void finalize() { + m_wait_ctx.release(); + } + task* execute(execution_data&) override { + m_func(); + finalize(); + return nullptr; + } + task* cancel(execution_data&) override { + finalize(); + return nullptr; + } +public: + function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} +}; + +class task_group_base : no_copy { +protected: + wait_context m_wait_ctx; + task_group_context m_context; + + template<typename F> + task_group_status internal_run_and_wait(const F& f) { + function_stack_task<F> t{ f, m_wait_ctx }; + m_wait_ctx.reserve(); + bool cancellation_status = false; + try_call([&] { + execute_and_wait(t, m_context, m_wait_ctx, m_context); + }).on_completion([&] { + // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); + m_context.reset(); + }); + return cancellation_status ? canceled : complete; + } + + template<typename F> + task* prepare_task(F&& f) { + m_wait_ctx.reserve(); + small_object_allocator alloc{}; + return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); + } + +public: + task_group_base(uintptr_t traits = 0) + : m_wait_ctx(0) + , m_context(task_group_context::bound, task_group_context::default_traits | traits) + { + } + + ~task_group_base() noexcept(false) { + if (m_wait_ctx.continue_execution()) { +#if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT + bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; +#else + bool stack_unwinding_in_progress = std::uncaught_exception(); +#endif + // Always attempt to do proper cleanup to avoid inevitable memory corruption + // in case of missing wait (for the sake of better testability & debuggability) + if (!m_context.is_group_execution_cancelled()) + cancel(); + d1::wait(m_wait_ctx, m_context); + if (!stack_unwinding_in_progress) + throw_exception(exception_id::missing_wait); + } + } + + task_group_status wait() { + bool cancellation_status = false; + try_call([&] { + d1::wait(m_wait_ctx, m_context); + }).on_completion([&] { + // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); + m_context.reset(); + }); + return cancellation_status ? canceled : complete; + } + + void cancel() { + m_context.cancel_group_execution(); + } +}; // class task_group_base + +class task_group : public task_group_base { +public: + task_group() : task_group_base(task_group_context::concurrent_wait) {} + + template<typename F> + void run(F&& f) { + spawn(*prepare_task(std::forward<F>(f)), m_context); + } + + template<typename F> + task_group_status run_and_wait(const F& f) { + return internal_run_and_wait(f); + } +}; // class task_group + +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +class spawn_delegate : public delegate_base { + task* task_to_spawn; + task_group_context& context; + bool operator()() const override { + spawn(*task_to_spawn, context); + return true; + } +public: + spawn_delegate(task* a_task, task_group_context& ctx) + : task_to_spawn(a_task), context(ctx) + {} +}; + +class wait_delegate : public delegate_base { + bool operator()() const override { + status = tg.wait(); + return true; + } +protected: + task_group& tg; + task_group_status& status; +public: + wait_delegate(task_group& a_group, task_group_status& tgs) + : tg(a_group), status(tgs) {} +}; + +template<typename F> +class run_wait_delegate : public wait_delegate { + F& func; + bool operator()() const override { + status = tg.run_and_wait(func); + return true; + } +public: + run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs) + : wait_delegate(a_group, tgs), func(a_func) {} +}; + +class isolated_task_group : public task_group { + intptr_t this_isolation() { + return reinterpret_cast<intptr_t>(this); + } +public: + isolated_task_group () : task_group() {} + + template<typename F> + void run(F&& f) { + spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context); + r1::isolate_within_arena(sd, this_isolation()); + } + + template<typename F> + task_group_status run_and_wait( const F& f ) { + task_group_status result = not_complete; + run_wait_delegate<const F> rwd(*this, f, result); + r1::isolate_within_arena(rwd, this_isolation()); + __TBB_ASSERT(result != not_complete, "premature exit from wait?"); + return result; + } + + task_group_status wait() { + task_group_status result = not_complete; + wait_delegate wd(*this, result); + r1::isolate_within_arena(wd, this_isolation()); + __TBB_ASSERT(result != not_complete, "premature exit from wait?"); + return result; + } +}; // class isolated_task_group +#endif // TBB_PREVIEW_ISOLATED_TASK_GROUP + +inline bool is_current_task_group_canceling() { + task_group_context* ctx = current_context(); + return ctx ? ctx->is_group_execution_cancelled() : false; +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::task_group_context; +using detail::d1::task_group; +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +using detail::d1::isolated_task_group; +#endif + +using detail::d1::task_group_status; +using detail::d1::not_complete; +using detail::d1::complete; +using detail::d1::canceled; + +using detail::d1::is_current_task_group_canceling; +using detail::r1::missing_wait; +} + +} // namespace tbb + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning(pop) // 4324 warning +#endif + +#endif // __TBB_task_group_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h index 276ca70707..0f6f25f124 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h @@ -1,116 +1,116 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_task_scheduler_observer_H -#define __TBB_task_scheduler_observer_H - -#include "detail/_namespace_injection.h" -#include "task_arena.h" -#include <atomic> - -namespace tbb { -namespace detail { - -namespace d1 { -class task_scheduler_observer; -} - -namespace r1 { -class observer_proxy; -class observer_list; - -//! Enable or disable observation -/** For local observers the method can be used only when the current thread -has the task scheduler initialized or is attached to an arena. -Repeated calls with the same state are no-ops. **/ -void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); -} - -namespace d1 { -class task_scheduler_observer { - friend class r1::observer_proxy; - friend class r1::observer_list; - friend void r1::observe(d1::task_scheduler_observer&, bool); - - //! Pointer to the proxy holding this observer. - /** Observers are proxied by the scheduler to maintain persistent lists of them. **/ - std::atomic<r1::observer_proxy*> my_proxy{ nullptr }; - - //! Counter preventing the observer from being destroyed while in use by the scheduler. - /** Valid only when observation is on. **/ - std::atomic<intptr_t> my_busy_count{ 0 }; - - //! Contains task_arena pointer - task_arena* my_task_arena{ nullptr }; -public: - //! Returns true if observation is enabled, false otherwise. - bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; } - - //! Entry notification - /** Invoked from inside observe(true) call and whenever a worker enters the arena - this observer is associated with. If a thread is already in the arena when - the observer is activated, the entry notification is called before it - executes the first stolen task. **/ - virtual void on_scheduler_entry( bool /*is_worker*/ ) {} - - //! Exit notification - /** Invoked from inside observe(false) call and whenever a worker leaves the - arena this observer is associated with. **/ - virtual void on_scheduler_exit( bool /*is_worker*/ ) {} - - //! Construct local or global observer in inactive state (observation disabled). - /** For a local observer entry/exit notifications are invoked whenever a worker - thread joins/leaves the arena of the observer's owner thread. If a thread is - already in the arena when the observer is activated, the entry notification is - called before it executes the first stolen task. **/ - explicit task_scheduler_observer() = default; - - //! Construct local observer for a given arena in inactive state (observation disabled). - /** entry/exit notifications are invoked whenever a thread joins/leaves arena. - If a thread is already in the arena when the observer is activated, the entry notification - is called before it executes the first stolen task. **/ - explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {} - - /** Destructor protects instance of the observer from concurrent notification. - It is recommended to disable observation before destructor of a derived class starts, - otherwise it can lead to concurrent notification callback on partly destroyed object **/ - virtual ~task_scheduler_observer() { - if (my_proxy.load(std::memory_order_relaxed)) { - observe(false); - } - } - - //! Enable or disable observation - /** Warning: concurrent invocations of this method are not safe. - Repeated calls with the same state are no-ops. **/ - void observe(bool state = true) { - if( state && !my_proxy.load(std::memory_order_relaxed) ) { - __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance"); - } - r1::observe(*this, state); - } -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::d1::task_scheduler_observer; -} -} // namespace tbb - - -#endif /* __TBB_task_scheduler_observer_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_scheduler_observer_H +#define __TBB_task_scheduler_observer_H + +#include "detail/_namespace_injection.h" +#include "task_arena.h" +#include <atomic> + +namespace tbb { +namespace detail { + +namespace d1 { +class task_scheduler_observer; +} + +namespace r1 { +class observer_proxy; +class observer_list; + +//! Enable or disable observation +/** For local observers the method can be used only when the current thread +has the task scheduler initialized or is attached to an arena. +Repeated calls with the same state are no-ops. **/ +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); +} + +namespace d1 { +class task_scheduler_observer { + friend class r1::observer_proxy; + friend class r1::observer_list; + friend void r1::observe(d1::task_scheduler_observer&, bool); + + //! Pointer to the proxy holding this observer. + /** Observers are proxied by the scheduler to maintain persistent lists of them. **/ + std::atomic<r1::observer_proxy*> my_proxy{ nullptr }; + + //! Counter preventing the observer from being destroyed while in use by the scheduler. + /** Valid only when observation is on. **/ + std::atomic<intptr_t> my_busy_count{ 0 }; + + //! Contains task_arena pointer + task_arena* my_task_arena{ nullptr }; +public: + //! Returns true if observation is enabled, false otherwise. + bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; } + + //! Entry notification + /** Invoked from inside observe(true) call and whenever a worker enters the arena + this observer is associated with. If a thread is already in the arena when + the observer is activated, the entry notification is called before it + executes the first stolen task. **/ + virtual void on_scheduler_entry( bool /*is_worker*/ ) {} + + //! Exit notification + /** Invoked from inside observe(false) call and whenever a worker leaves the + arena this observer is associated with. **/ + virtual void on_scheduler_exit( bool /*is_worker*/ ) {} + + //! Construct local or global observer in inactive state (observation disabled). + /** For a local observer entry/exit notifications are invoked whenever a worker + thread joins/leaves the arena of the observer's owner thread. If a thread is + already in the arena when the observer is activated, the entry notification is + called before it executes the first stolen task. **/ + explicit task_scheduler_observer() = default; + + //! Construct local observer for a given arena in inactive state (observation disabled). + /** entry/exit notifications are invoked whenever a thread joins/leaves arena. + If a thread is already in the arena when the observer is activated, the entry notification + is called before it executes the first stolen task. **/ + explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {} + + /** Destructor protects instance of the observer from concurrent notification. + It is recommended to disable observation before destructor of a derived class starts, + otherwise it can lead to concurrent notification callback on partly destroyed object **/ + virtual ~task_scheduler_observer() { + if (my_proxy.load(std::memory_order_relaxed)) { + observe(false); + } + } + + //! Enable or disable observation + /** Warning: concurrent invocations of this method are not safe. + Repeated calls with the same state are no-ops. **/ + void observe(bool state = true) { + if( state && !my_proxy.load(std::memory_order_relaxed) ) { + __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance"); + } + r1::observe(*this, state); + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::task_scheduler_observer; +} +} // namespace tbb + + +#endif /* __TBB_task_scheduler_observer_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h index 3da61a009d..1018a15793 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h @@ -1,126 +1,126 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_tbb_allocator_H -#define __TBB_tbb_allocator_H - -#include "oneapi/tbb/detail/_utils.h" -#include "detail/_namespace_injection.h" -#include <cstdlib> -#include <utility> - -#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT -#error #include <memory_resource> -#endif - -namespace tbb { -namespace detail { - -namespace r1 { -void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); -void __TBB_EXPORTED_FUNC deallocate_memory(void* p); -bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); -} - -namespace d1 { - -template<typename T> -class tbb_allocator { -public: - using value_type = T; - using propagate_on_container_move_assignment = std::true_type; - - //! Always defined for TBB containers (supported since C++17 for std containers) - using is_always_equal = std::true_type; - - //! Specifies current allocator - enum malloc_type { - scalable, - standard - }; - - tbb_allocator() = default; - template<typename U> tbb_allocator(const tbb_allocator<U>&) noexcept {} - - //! Allocate space for n objects. - __TBB_nodiscard T* allocate(std::size_t n) { - return static_cast<T*>(r1::allocate_memory(n * sizeof(value_type))); - } - - //! Free previously allocated block of memory. - void deallocate(T* p, std::size_t) { - r1::deallocate_memory(p); - } - - //! Returns current allocator - static malloc_type allocator_type() { - return r1::is_tbbmalloc_used() ? standard : scalable; - } - -#if TBB_ALLOCATOR_TRAITS_BROKEN - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using difference_type = std::ptrdiff_t; - using size_type = std::size_t; - template<typename U> struct rebind { - using other = tbb_allocator<U>; - }; - //! Largest value for which method allocate might succeed. - size_type max_size() const noexcept { - size_type max = ~(std::size_t(0)) / sizeof(value_type); - return (max > 0 ? max : 1); - } - template<typename U, typename... Args> - void construct(U *p, Args&&... args) - { ::new (p) U(std::forward<Args>(args)...); } - void destroy( pointer p ) { p->~value_type(); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } -#endif // TBB_ALLOCATOR_TRAITS_BROKEN -}; - -#if TBB_ALLOCATOR_TRAITS_BROKEN - template<> - class tbb_allocator<void> { - public: - using pointer = void*; - using const_pointer = const void*; - using value_type = void; - template<typename U> struct rebind { - using other = tbb_allocator<U>; - }; - }; -#endif - -template<typename T, typename U> -inline bool operator==(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return true; } - -#if !__TBB_CPP20_COMPARISONS_PRESENT -template<typename T, typename U> -inline bool operator!=(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return false; } -#endif - -} // namespace d1 -} // namespace detail - -inline namespace v1 { -using detail::d1::tbb_allocator; -} // namespace v1 -} // namespace tbb - -#endif /* __TBB_tbb_allocator_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_allocator_H +#define __TBB_tbb_allocator_H + +#include "oneapi/tbb/detail/_utils.h" +#include "detail/_namespace_injection.h" +#include <cstdlib> +#include <utility> + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); +void __TBB_EXPORTED_FUNC deallocate_memory(void* p); +bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); +} + +namespace d1 { + +template<typename T> +class tbb_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers (supported since C++17 for std containers) + using is_always_equal = std::true_type; + + //! Specifies current allocator + enum malloc_type { + scalable, + standard + }; + + tbb_allocator() = default; + template<typename U> tbb_allocator(const tbb_allocator<U>&) noexcept {} + + //! Allocate space for n objects. + __TBB_nodiscard T* allocate(std::size_t n) { + return static_cast<T*>(r1::allocate_memory(n * sizeof(value_type))); + } + + //! Free previously allocated block of memory. + void deallocate(T* p, std::size_t) { + r1::deallocate_memory(p); + } + + //! Returns current allocator + static malloc_type allocator_type() { + return r1::is_tbbmalloc_used() ? standard : scalable; + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = tbb_allocator<U>; + }; + //! Largest value for which method allocate might succeed. + size_type max_size() const noexcept { + size_type max = ~(std::size_t(0)) / sizeof(value_type); + return (max > 0 ? max : 1); + } + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new (p) U(std::forward<Args>(args)...); } + void destroy( pointer p ) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class tbb_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = tbb_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +inline bool operator==(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +inline bool operator!=(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return false; } +#endif + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::tbb_allocator; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_tbb_allocator_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h index 0ba38f215e..bb7c143ee5 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h +++ b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h @@ -1,65 +1,65 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -/* -Replacing the standard memory allocation routines in Microsoft* C/C++ RTL -(malloc/free, global new/delete, etc.) with the TBB memory allocator. - -Include the following header to a source of any binary which is loaded during -application startup - -#include "oneapi/tbb/tbbmalloc_proxy.h" - -or add following parameters to the linker options for the binary which is -loaded during application startup. It can be either exe-file or dll. - -For win32 -tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" -win64 -tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy" -*/ - -#ifndef __TBB_tbbmalloc_proxy_H -#define __TBB_tbbmalloc_proxy_H - -#if _MSC_VER - -#ifdef _DEBUG - #pragma comment(lib, "tbbmalloc_proxy_debug.lib") -#else - #pragma comment(lib, "tbbmalloc_proxy.lib") -#endif - -#if defined(_WIN64) - #pragma comment(linker, "/include:__TBB_malloc_proxy") -#else - #pragma comment(linker, "/include:___TBB_malloc_proxy") -#endif - -#else -/* Primarily to support MinGW */ - -extern "C" void __TBB_malloc_proxy(); -struct __TBB_malloc_proxy_caller { - __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); } -} volatile __TBB_malloc_proxy_helper_object; - -#endif // _MSC_VER - -/* Public Windows API */ -extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr); - -#endif //__TBB_tbbmalloc_proxy_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* +Replacing the standard memory allocation routines in Microsoft* C/C++ RTL +(malloc/free, global new/delete, etc.) with the TBB memory allocator. + +Include the following header to a source of any binary which is loaded during +application startup + +#include "oneapi/tbb/tbbmalloc_proxy.h" + +or add following parameters to the linker options for the binary which is +loaded during application startup. It can be either exe-file or dll. + +For win32 +tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" +win64 +tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy" +*/ + +#ifndef __TBB_tbbmalloc_proxy_H +#define __TBB_tbbmalloc_proxy_H + +#if _MSC_VER + +#ifdef _DEBUG + #pragma comment(lib, "tbbmalloc_proxy_debug.lib") +#else + #pragma comment(lib, "tbbmalloc_proxy.lib") +#endif + +#if defined(_WIN64) + #pragma comment(linker, "/include:__TBB_malloc_proxy") +#else + #pragma comment(linker, "/include:___TBB_malloc_proxy") +#endif + +#else +/* Primarily to support MinGW */ + +extern "C" void __TBB_malloc_proxy(); +struct __TBB_malloc_proxy_caller { + __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); } +} volatile __TBB_malloc_proxy_helper_object; + +#endif // _MSC_VER + +/* Public Windows API */ +extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr); + +#endif //__TBB_tbbmalloc_proxy_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/tick_count.h b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h index 2caa56ba18..96fbf3d5d1 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/tick_count.h +++ b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h @@ -1,99 +1,99 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_tick_count_H -#define __TBB_tick_count_H - -#include <chrono> - -#include "detail/_namespace_injection.h" - -namespace tbb { -namespace detail { -namespace d1 { - - -//! Absolute timestamp -/** @ingroup timing */ -class tick_count { -public: - using clock_type = typename std::conditional<std::chrono::high_resolution_clock::is_steady, - std::chrono::high_resolution_clock, std::chrono::steady_clock>::type; - - //! Relative time interval. - class interval_t : public clock_type::duration { - public: - //! Construct a time interval representing zero time duration - interval_t() : clock_type::duration(clock_type::duration::zero()) {} - - //! Construct a time interval representing sec seconds time duration - explicit interval_t( double sec ) - : clock_type::duration(std::chrono::duration_cast<clock_type::duration>(std::chrono::duration<double>(sec))) {} - - //! Return the length of a time interval in seconds - double seconds() const { - return std::chrono::duration_cast<std::chrono::duration<double>>(*this).count(); - } - - //! Extract the intervals from the tick_counts and subtract them. - friend interval_t operator-( const tick_count& t1, const tick_count& t0 ); - - //! Add two intervals. - friend interval_t operator+( const interval_t& i, const interval_t& j ) { - return interval_t(std::chrono::operator+(i, j)); - } - - //! Subtract two intervals. - friend interval_t operator-( const interval_t& i, const interval_t& j ) { - return interval_t(std::chrono::operator-(i, j)); - } - - private: - explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {} - }; - - tick_count() = default; - - //! Return current time. - static tick_count now() { - return clock_type::now(); - } - - //! Subtract two timestamps to get the time interval between - friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) { - return tick_count::interval_t(t1.my_time_point - t0.my_time_point); - } - - //! Return the resolution of the clock in seconds per tick. - static double resolution() { - return static_cast<double>(interval_t::period::num) / interval_t::period::den; - } - -private: - clock_type::time_point my_time_point; - tick_count( clock_type::time_point tp ) : my_time_point(tp) {} -}; - -} // namespace d1 -} // namespace detail - -inline namespace v1 { - using detail::d1::tick_count; -} // namespace v1 - -} // namespace tbb - -#endif /* __TBB_tick_count_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tick_count_H +#define __TBB_tick_count_H + +#include <chrono> + +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + + +//! Absolute timestamp +/** @ingroup timing */ +class tick_count { +public: + using clock_type = typename std::conditional<std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, std::chrono::steady_clock>::type; + + //! Relative time interval. + class interval_t : public clock_type::duration { + public: + //! Construct a time interval representing zero time duration + interval_t() : clock_type::duration(clock_type::duration::zero()) {} + + //! Construct a time interval representing sec seconds time duration + explicit interval_t( double sec ) + : clock_type::duration(std::chrono::duration_cast<clock_type::duration>(std::chrono::duration<double>(sec))) {} + + //! Return the length of a time interval in seconds + double seconds() const { + return std::chrono::duration_cast<std::chrono::duration<double>>(*this).count(); + } + + //! Extract the intervals from the tick_counts and subtract them. + friend interval_t operator-( const tick_count& t1, const tick_count& t0 ); + + //! Add two intervals. + friend interval_t operator+( const interval_t& i, const interval_t& j ) { + return interval_t(std::chrono::operator+(i, j)); + } + + //! Subtract two intervals. + friend interval_t operator-( const interval_t& i, const interval_t& j ) { + return interval_t(std::chrono::operator-(i, j)); + } + + private: + explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {} + }; + + tick_count() = default; + + //! Return current time. + static tick_count now() { + return clock_type::now(); + } + + //! Subtract two timestamps to get the time interval between + friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) { + return tick_count::interval_t(t1.my_time_point - t0.my_time_point); + } + + //! Return the resolution of the clock in seconds per tick. + static double resolution() { + return static_cast<double>(interval_t::period::num) / interval_t::period::den; + } + +private: + clock_type::time_point my_time_point; + tick_count( clock_type::time_point tp ) : my_time_point(tp) {} +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::tick_count; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_tick_count_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/version.h b/contrib/libs/tbb/include/oneapi/tbb/version.h index 1e3507cd9b..a791937df8 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/version.h +++ b/contrib/libs/tbb/include/oneapi/tbb/version.h @@ -1,108 +1,108 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_version_H -#define __TBB_version_H - -#include "detail/_config.h" -#include "detail/_namespace_injection.h" - -// Product version -#define TBB_VERSION_MAJOR 2021 -// Update version -#define TBB_VERSION_MINOR 2 -// "Patch" version for custom releases -#define TBB_VERSION_PATCH 0 -// Suffix string -#define __TBB_VERSION_SUFFIX "" -// Full official version string -#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX - -// OneAPI oneTBB specification version -#define ONETBB_SPEC_VERSION "1.0" -// Full interface version -#define TBB_INTERFACE_VERSION 12020 -// Major interface version -#define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) -// Minor interface version -#define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10) - -// The binary compatibility version -// To be used in SONAME, manifests, etc. -#define __TBB_BINARY_VERSION 12 - -//! TBB_VERSION support -#ifndef ENDL -#define ENDL "\n" -#endif - -//TBB_REVAMP_TODO: consider enabling version_string.ver generation -//TBB_REVAMP_TODO: #include "version_string.ver" - -#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL -#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL -#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL - -#ifndef TBB_USE_DEBUG - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL -#elif TBB_USE_DEBUG==0 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL -#elif TBB_USE_DEBUG==1 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL -#elif TBB_USE_DEBUG==2 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL -#else - #error Unexpected value for TBB_USE_DEBUG -#endif - -#ifndef TBB_USE_ASSERT - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL -#elif TBB_USE_ASSERT==0 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL -#elif TBB_USE_ASSERT==1 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL -#elif TBB_USE_ASSERT==2 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL -#else - #error Unexpected value for TBB_USE_ASSERT -#endif - -#define TBB_VERSION_STRINGS_P(N) \ - __TBB_ONETBB_SPEC_VERSION(N) \ - __TBB_VERSION_NUMBER(N) \ - __TBB_INTERFACE_VERSION_NUMBER(N) \ - __TBB_VERSION_USE_DEBUG(N) \ - __TBB_VERSION_USE_ASSERT(N) - -#define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB) -#define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc) - -//! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB) -//! shared library being used. -/** - * The returned pointer is an address of a string in the shared library. - * It can be different than the TBB_VERSION_STRING obtained at compile time. - */ -extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); - -//! The function returns the interface version of the oneTBB shared library being used. -/** - * The returned version is determined at runtime, not at compile/link time. - * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. - */ -extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); - -#endif // __TBB_version_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_version_H +#define __TBB_version_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +// Product version +#define TBB_VERSION_MAJOR 2021 +// Update version +#define TBB_VERSION_MINOR 2 +// "Patch" version for custom releases +#define TBB_VERSION_PATCH 0 +// Suffix string +#define __TBB_VERSION_SUFFIX "" +// Full official version string +#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX + +// OneAPI oneTBB specification version +#define ONETBB_SPEC_VERSION "1.0" +// Full interface version +#define TBB_INTERFACE_VERSION 12020 +// Major interface version +#define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) +// Minor interface version +#define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10) + +// The binary compatibility version +// To be used in SONAME, manifests, etc. +#define __TBB_BINARY_VERSION 12 + +//! TBB_VERSION support +#ifndef ENDL +#define ENDL "\n" +#endif + +//TBB_REVAMP_TODO: consider enabling version_string.ver generation +//TBB_REVAMP_TODO: #include "version_string.ver" + +#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL +#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL +#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL + +#ifndef TBB_USE_DEBUG + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL +#elif TBB_USE_DEBUG==0 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL +#elif TBB_USE_DEBUG==1 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL +#elif TBB_USE_DEBUG==2 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL +#else + #error Unexpected value for TBB_USE_DEBUG +#endif + +#ifndef TBB_USE_ASSERT + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL +#elif TBB_USE_ASSERT==0 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL +#elif TBB_USE_ASSERT==1 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL +#elif TBB_USE_ASSERT==2 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL +#else + #error Unexpected value for TBB_USE_ASSERT +#endif + +#define TBB_VERSION_STRINGS_P(N) \ + __TBB_ONETBB_SPEC_VERSION(N) \ + __TBB_VERSION_NUMBER(N) \ + __TBB_INTERFACE_VERSION_NUMBER(N) \ + __TBB_VERSION_USE_DEBUG(N) \ + __TBB_VERSION_USE_ASSERT(N) + +#define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB) +#define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc) + +//! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB) +//! shared library being used. +/** + * The returned pointer is an address of a string in the shared library. + * It can be different than the TBB_VERSION_STRING obtained at compile time. + */ +extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); + +//! The function returns the interface version of the oneTBB shared library being used. +/** + * The returned version is determined at runtime, not at compile/link time. + * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. + */ +extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); + +#endif // __TBB_version_H diff --git a/contrib/libs/tbb/include/tbb/blocked_range.h b/contrib/libs/tbb/include/tbb/blocked_range.h index 316ec01ba9..40b0d76261 100644 --- a/contrib/libs/tbb/include/tbb/blocked_range.h +++ b/contrib/libs/tbb/include/tbb/blocked_range.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/blocked_range.h" +#include "../oneapi/tbb/blocked_range.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_range2d.h b/contrib/libs/tbb/include/tbb/blocked_range2d.h index 1e13240787..62b0de3886 100644 --- a/contrib/libs/tbb/include/tbb/blocked_range2d.h +++ b/contrib/libs/tbb/include/tbb/blocked_range2d.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/blocked_range2d.h" +#include "../oneapi/tbb/blocked_range2d.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_range3d.h b/contrib/libs/tbb/include/tbb/blocked_range3d.h index 3321979660..c1d949ed87 100644 --- a/contrib/libs/tbb/include/tbb/blocked_range3d.h +++ b/contrib/libs/tbb/include/tbb/blocked_range3d.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/blocked_range3d.h" +#include "../oneapi/tbb/blocked_range3d.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h index 0c0fb7303a..654f617ff6 100644 --- a/contrib/libs/tbb/include/tbb/blocked_rangeNd.h +++ b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2017-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/blocked_rangeNd.h" +/* + Copyright (c) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/blocked_rangeNd.h" diff --git a/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h index 2d3c66a74a..c257a5a1da 100644 --- a/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h +++ b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/cache_aligned_allocator.h" +#include "../oneapi/tbb/cache_aligned_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/combinable.h b/contrib/libs/tbb/include/tbb/combinable.h index 50295ec72a..71b60e8c01 100644 --- a/contrib/libs/tbb/include/tbb/combinable.h +++ b/contrib/libs/tbb/include/tbb/combinable.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/combinable.h" +#include "../oneapi/tbb/combinable.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h index 68652c5961..4099be202b 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_hash_map.h +++ b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_hash_map.h" +#include "../oneapi/tbb/concurrent_hash_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h index 2757a234be..4c8cc6d579 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h +++ b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_lru_cache.h" +#include "../oneapi/tbb/concurrent_lru_cache.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_map.h b/contrib/libs/tbb/include/tbb/concurrent_map.h index 84f59d7e66..3d5c3e80a5 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_map.h +++ b/contrib/libs/tbb/include/tbb/concurrent_map.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/concurrent_map.h" +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h index 3b27130b1e..c2db1cac41 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h +++ b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_priority_queue.h" +#include "../oneapi/tbb/concurrent_priority_queue.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_queue.h b/contrib/libs/tbb/include/tbb/concurrent_queue.h index d81a58b887..68580e5c7c 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_queue.h +++ b/contrib/libs/tbb/include/tbb/concurrent_queue.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_queue.h" +#include "../oneapi/tbb/concurrent_queue.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_set.h b/contrib/libs/tbb/include/tbb/concurrent_set.h index cf4652f597..f822ada28f 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_set.h +++ b/contrib/libs/tbb/include/tbb/concurrent_set.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/concurrent_set.h" +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_set.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h index 9475c06cf3..8bd7b8d954 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h +++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_unordered_map.h" +#include "../oneapi/tbb/concurrent_unordered_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h index 81a8f9c37d..d8527b4fa5 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h +++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_unordered_set.h" +#include "../oneapi/tbb/concurrent_unordered_set.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_vector.h b/contrib/libs/tbb/include/tbb/concurrent_vector.h index c1fc97c623..9e72476cc5 100644 --- a/contrib/libs/tbb/include/tbb/concurrent_vector.h +++ b/contrib/libs/tbb/include/tbb/concurrent_vector.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/concurrent_vector.h" +#include "../oneapi/tbb/concurrent_vector.h" diff --git a/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h index 9d6050d64f..d36e50038d 100644 --- a/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h +++ b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/enumerable_thread_specific.h" +#include "../oneapi/tbb/enumerable_thread_specific.h" diff --git a/contrib/libs/tbb/include/tbb/flow_graph.h b/contrib/libs/tbb/include/tbb/flow_graph.h index 40da468fe0..b337ae80a3 100644 --- a/contrib/libs/tbb/include/tbb/flow_graph.h +++ b/contrib/libs/tbb/include/tbb/flow_graph.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/flow_graph.h" +#include "../oneapi/tbb/flow_graph.h" diff --git a/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h index cd9dc2967e..a24a4ea744 100644 --- a/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h +++ b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/flow_graph_abstractions.h" +#include "../oneapi/tbb/flow_graph_abstractions.h" diff --git a/contrib/libs/tbb/include/tbb/global_control.h b/contrib/libs/tbb/include/tbb/global_control.h index 2688996ecb..0c87e9a051 100644 --- a/contrib/libs/tbb/include/tbb/global_control.h +++ b/contrib/libs/tbb/include/tbb/global_control.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/global_control.h" +#include "../oneapi/tbb/global_control.h" diff --git a/contrib/libs/tbb/include/tbb/info.h b/contrib/libs/tbb/include/tbb/info.h index 02d331650e..7a42b6ce6b 100644 --- a/contrib/libs/tbb/include/tbb/info.h +++ b/contrib/libs/tbb/include/tbb/info.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2019-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/info.h" +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/info.h" diff --git a/contrib/libs/tbb/include/tbb/memory_pool.h b/contrib/libs/tbb/include/tbb/memory_pool.h index cefe96e36d..3e971d4e0f 100644 --- a/contrib/libs/tbb/include/tbb/memory_pool.h +++ b/contrib/libs/tbb/include/tbb/memory_pool.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/memory_pool.h" +#include "../oneapi/tbb/memory_pool.h" diff --git a/contrib/libs/tbb/include/tbb/null_mutex.h b/contrib/libs/tbb/include/tbb/null_mutex.h index 63218bf061..8d1bb5263e 100644 --- a/contrib/libs/tbb/include/tbb/null_mutex.h +++ b/contrib/libs/tbb/include/tbb/null_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/null_mutex.h" +#include "../oneapi/tbb/null_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/tbb/null_rw_mutex.h index 71c42fe26a..44c8f55983 100644 --- a/contrib/libs/tbb/include/tbb/null_rw_mutex.h +++ b/contrib/libs/tbb/include/tbb/null_rw_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/null_rw_mutex.h" +#include "../oneapi/tbb/null_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_for.h b/contrib/libs/tbb/include/tbb/parallel_for.h index fea1d1b9f5..3403240263 100644 --- a/contrib/libs/tbb/include/tbb/parallel_for.h +++ b/contrib/libs/tbb/include/tbb/parallel_for.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_for.h" +#include "../oneapi/tbb/parallel_for.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_for_each.h b/contrib/libs/tbb/include/tbb/parallel_for_each.h index 27c2ab1727..d5e0c2d08a 100644 --- a/contrib/libs/tbb/include/tbb/parallel_for_each.h +++ b/contrib/libs/tbb/include/tbb/parallel_for_each.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_for_each.h" +#include "../oneapi/tbb/parallel_for_each.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_invoke.h b/contrib/libs/tbb/include/tbb/parallel_invoke.h index 6c21100e70..01f15cfe84 100644 --- a/contrib/libs/tbb/include/tbb/parallel_invoke.h +++ b/contrib/libs/tbb/include/tbb/parallel_invoke.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_invoke.h" +#include "../oneapi/tbb/parallel_invoke.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/tbb/parallel_pipeline.h index aceee49f8a..8f8c821946 100644 --- a/contrib/libs/tbb/include/tbb/parallel_pipeline.h +++ b/contrib/libs/tbb/include/tbb/parallel_pipeline.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/parallel_pipeline.h" +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_pipeline.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_reduce.h b/contrib/libs/tbb/include/tbb/parallel_reduce.h index 83658755a8..c0161c8023 100644 --- a/contrib/libs/tbb/include/tbb/parallel_reduce.h +++ b/contrib/libs/tbb/include/tbb/parallel_reduce.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_reduce.h" +#include "../oneapi/tbb/parallel_reduce.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_scan.h b/contrib/libs/tbb/include/tbb/parallel_scan.h index 682032a561..f8dc1e5111 100644 --- a/contrib/libs/tbb/include/tbb/parallel_scan.h +++ b/contrib/libs/tbb/include/tbb/parallel_scan.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_scan.h" +#include "../oneapi/tbb/parallel_scan.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_sort.h b/contrib/libs/tbb/include/tbb/parallel_sort.h index b238e6caa4..2eec4913d4 100644 --- a/contrib/libs/tbb/include/tbb/parallel_sort.h +++ b/contrib/libs/tbb/include/tbb/parallel_sort.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/parallel_sort.h" +#include "../oneapi/tbb/parallel_sort.h" diff --git a/contrib/libs/tbb/include/tbb/partitioner.h b/contrib/libs/tbb/include/tbb/partitioner.h index b959e35a2f..3cd0e32fc3 100644 --- a/contrib/libs/tbb/include/tbb/partitioner.h +++ b/contrib/libs/tbb/include/tbb/partitioner.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/partitioner.h" +#include "../oneapi/tbb/partitioner.h" diff --git a/contrib/libs/tbb/include/tbb/profiling.h b/contrib/libs/tbb/include/tbb/profiling.h index c7cea9c590..f4ebf88164 100644 --- a/contrib/libs/tbb/include/tbb/profiling.h +++ b/contrib/libs/tbb/include/tbb/profiling.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/profiling.h" +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/profiling.h" diff --git a/contrib/libs/tbb/include/tbb/queuing_mutex.h b/contrib/libs/tbb/include/tbb/queuing_mutex.h index ad031e4eb7..4766cf92c1 100644 --- a/contrib/libs/tbb/include/tbb/queuing_mutex.h +++ b/contrib/libs/tbb/include/tbb/queuing_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/queuing_mutex.h" +#include "../oneapi/tbb/queuing_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h index 203727ccc5..e4d4dd66f3 100644 --- a/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h +++ b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/queuing_rw_mutex.h" +#include "../oneapi/tbb/queuing_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/scalable_allocator.h b/contrib/libs/tbb/include/tbb/scalable_allocator.h index 5c654ebd68..d3d7b9b9db 100644 --- a/contrib/libs/tbb/include/tbb/scalable_allocator.h +++ b/contrib/libs/tbb/include/tbb/scalable_allocator.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/scalable_allocator.h" +#include "../oneapi/tbb/scalable_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/spin_mutex.h b/contrib/libs/tbb/include/tbb/spin_mutex.h index 1a6f7f077f..a092d73edb 100644 --- a/contrib/libs/tbb/include/tbb/spin_mutex.h +++ b/contrib/libs/tbb/include/tbb/spin_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/spin_mutex.h" +#include "../oneapi/tbb/spin_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h index d36282b486..a9a9685c20 100644 --- a/contrib/libs/tbb/include/tbb/spin_rw_mutex.h +++ b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/spin_rw_mutex.h" +#include "../oneapi/tbb/spin_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/task.h b/contrib/libs/tbb/include/tbb/task.h index 9be95b0d69..dae096777b 100644 --- a/contrib/libs/tbb/include/tbb/task.h +++ b/contrib/libs/tbb/include/tbb/task.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/task.h" +#include "../oneapi/tbb/task.h" diff --git a/contrib/libs/tbb/include/tbb/task_arena.h b/contrib/libs/tbb/include/tbb/task_arena.h index f6e34b3e6d..600e223089 100644 --- a/contrib/libs/tbb/include/tbb/task_arena.h +++ b/contrib/libs/tbb/include/tbb/task_arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/task_arena.h" +#include "../oneapi/tbb/task_arena.h" diff --git a/contrib/libs/tbb/include/tbb/task_group.h b/contrib/libs/tbb/include/tbb/task_group.h index 2f02503971..76e4ebb0ce 100644 --- a/contrib/libs/tbb/include/tbb/task_group.h +++ b/contrib/libs/tbb/include/tbb/task_group.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/task_group.h" +#include "../oneapi/tbb/task_group.h" diff --git a/contrib/libs/tbb/include/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h index 9236f4cdf4..51740119e0 100644 --- a/contrib/libs/tbb/include/tbb/task_scheduler_observer.h +++ b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/task_scheduler_observer.h" +#include "../oneapi/tbb/task_scheduler_observer.h" diff --git a/contrib/libs/tbb/include/tbb/tbb.h b/contrib/libs/tbb/include/tbb/tbb.h index e443b8f1ca..a3383ace99 100644 --- a/contrib/libs/tbb/include/tbb/tbb.h +++ b/contrib/libs/tbb/include/tbb/tbb.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb.h" +#include "../oneapi/tbb.h" diff --git a/contrib/libs/tbb/include/tbb/tbb_allocator.h b/contrib/libs/tbb/include/tbb/tbb_allocator.h index 81ab9d33b5..16210cb8d3 100644 --- a/contrib/libs/tbb/include/tbb/tbb_allocator.h +++ b/contrib/libs/tbb/include/tbb/tbb_allocator.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/tbb_allocator.h" +#include "../oneapi/tbb/tbb_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h index 93eaa18e80..373ba3777a 100644 --- a/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h +++ b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/tbbmalloc_proxy.h" +#include "../oneapi/tbb/tbbmalloc_proxy.h" diff --git a/contrib/libs/tbb/include/tbb/tick_count.h b/contrib/libs/tbb/include/tbb/tick_count.h index 170074aebb..f6f45ba52e 100644 --- a/contrib/libs/tbb/include/tbb/tick_count.h +++ b/contrib/libs/tbb/include/tbb/tick_count.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,4 +14,4 @@ limitations under the License. */ -#include "../oneapi/tbb/tick_count.h" +#include "../oneapi/tbb/tick_count.h" diff --git a/contrib/libs/tbb/include/tbb/version.h b/contrib/libs/tbb/include/tbb/version.h index cd13a83a15..6d4d78ff46 100644 --- a/contrib/libs/tbb/include/tbb/version.h +++ b/contrib/libs/tbb/include/tbb/version.h @@ -1,17 +1,17 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "../oneapi/tbb/version.h" +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/version.h" |