Restoring authorship annotation for Stanislav Kirillov <staskirillov@gmail.com>. Commit 1 of 2.

author: Stanislav Kirillov <staskirillov@gmail.com> 2022-02-10 16:46:07 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:46:07 +0300
commit: 92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c (patch)
tree: 817034f4ca57c9f841bb047ec94630c2e78a2b1d /contrib/libs/tbb/include
parent: 53c76da6d9f6cc5a17f6029df396f0e3bc1ff47d (diff)
download: ydb-92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c.tar.gz
129 files changed, 31998 insertions, 31998 deletions
diff --git a/contrib/libs/tbb/include/oneapi/tbb.h b/contrib/libs/tbb/include/oneapi/tbb.h
index 1ca41dc516..b51eaf90f6 100644
--- a/contrib/libs/tbb/include/oneapi/tbb.h
+++ b/contrib/libs/tbb/include/oneapi/tbb.h
@@ -1,73 +1,73 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_tbb_H
-#define __TBB_tbb_H
-
-/**
-    This header bulk-includes declarations or definitions of all the functionality
-    provided by TBB (save for tbbmalloc and 3rd party dependent headers).
-
-    If you use only a few TBB constructs, consider including specific headers only.
-    Any header listed below can be included independently of others.
-**/
-
-#include "oneapi/tbb/blocked_range.h"
-#include "oneapi/tbb/blocked_range2d.h"
-#include "oneapi/tbb/blocked_range3d.h"
-#if TBB_PREVIEW_BLOCKED_RANGE_ND
-#include "tbb/blocked_rangeNd.h"
-#endif
-#include "oneapi/tbb/cache_aligned_allocator.h"
-#include "oneapi/tbb/combinable.h"
-#include "oneapi/tbb/concurrent_hash_map.h"
-#if TBB_PREVIEW_CONCURRENT_LRU_CACHE
-#include "tbb/concurrent_lru_cache.h"
-#endif
-#include "oneapi/tbb/concurrent_priority_queue.h"
-#include "oneapi/tbb/concurrent_queue.h"
-#include "oneapi/tbb/concurrent_unordered_map.h"
-#include "oneapi/tbb/concurrent_unordered_set.h"
-#include "oneapi/tbb/concurrent_map.h"
-#include "oneapi/tbb/concurrent_set.h"
-#include "oneapi/tbb/concurrent_vector.h"
-#include "oneapi/tbb/enumerable_thread_specific.h"
-#include "oneapi/tbb/flow_graph.h"
-#include "oneapi/tbb/global_control.h"
-#include "oneapi/tbb/info.h"
-#include "oneapi/tbb/null_mutex.h"
-#include "oneapi/tbb/null_rw_mutex.h"
-#include "oneapi/tbb/parallel_for.h"
-#include "oneapi/tbb/parallel_for_each.h"
-#include "oneapi/tbb/parallel_invoke.h"
-#include "oneapi/tbb/parallel_pipeline.h"
-#include "oneapi/tbb/parallel_reduce.h"
-#include "oneapi/tbb/parallel_scan.h"
-#include "oneapi/tbb/parallel_sort.h"
-#include "oneapi/tbb/partitioner.h"
-#include "oneapi/tbb/queuing_mutex.h"
-#include "oneapi/tbb/queuing_rw_mutex.h"
-#include "oneapi/tbb/spin_mutex.h"
-#include "oneapi/tbb/spin_rw_mutex.h"
-#include "oneapi/tbb/task.h"
-#include "oneapi/tbb/task_arena.h"
-#include "oneapi/tbb/task_group.h"
-#include "oneapi/tbb/task_scheduler_observer.h"
-#include "oneapi/tbb/tbb_allocator.h"
-#include "oneapi/tbb/tick_count.h"
-#include "oneapi/tbb/version.h"
-
-#endif /* __TBB_tbb_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_tbb_H 
+#define __TBB_tbb_H 
+ 
+/** 
+    This header bulk-includes declarations or definitions of all the functionality 
+    provided by TBB (save for tbbmalloc and 3rd party dependent headers). 
+ 
+    If you use only a few TBB constructs, consider including specific headers only. 
+    Any header listed below can be included independently of others. 
+**/ 
+ 
+#include "oneapi/tbb/blocked_range.h" 
+#include "oneapi/tbb/blocked_range2d.h" 
+#include "oneapi/tbb/blocked_range3d.h" 
+#if TBB_PREVIEW_BLOCKED_RANGE_ND 
+#include "tbb/blocked_rangeNd.h" 
+#endif 
+#include "oneapi/tbb/cache_aligned_allocator.h" 
+#include "oneapi/tbb/combinable.h" 
+#include "oneapi/tbb/concurrent_hash_map.h" 
+#if TBB_PREVIEW_CONCURRENT_LRU_CACHE 
+#include "tbb/concurrent_lru_cache.h" 
+#endif 
+#include "oneapi/tbb/concurrent_priority_queue.h" 
+#include "oneapi/tbb/concurrent_queue.h" 
+#include "oneapi/tbb/concurrent_unordered_map.h" 
+#include "oneapi/tbb/concurrent_unordered_set.h" 
+#include "oneapi/tbb/concurrent_map.h" 
+#include "oneapi/tbb/concurrent_set.h" 
+#include "oneapi/tbb/concurrent_vector.h" 
+#include "oneapi/tbb/enumerable_thread_specific.h" 
+#include "oneapi/tbb/flow_graph.h" 
+#include "oneapi/tbb/global_control.h" 
+#include "oneapi/tbb/info.h" 
+#include "oneapi/tbb/null_mutex.h" 
+#include "oneapi/tbb/null_rw_mutex.h" 
+#include "oneapi/tbb/parallel_for.h" 
+#include "oneapi/tbb/parallel_for_each.h" 
+#include "oneapi/tbb/parallel_invoke.h" 
+#include "oneapi/tbb/parallel_pipeline.h" 
+#include "oneapi/tbb/parallel_reduce.h" 
+#include "oneapi/tbb/parallel_scan.h" 
+#include "oneapi/tbb/parallel_sort.h" 
+#include "oneapi/tbb/partitioner.h" 
+#include "oneapi/tbb/queuing_mutex.h" 
+#include "oneapi/tbb/queuing_rw_mutex.h" 
+#include "oneapi/tbb/spin_mutex.h" 
+#include "oneapi/tbb/spin_rw_mutex.h" 
+#include "oneapi/tbb/task.h" 
+#include "oneapi/tbb/task_arena.h" 
+#include "oneapi/tbb/task_group.h" 
+#include "oneapi/tbb/task_scheduler_observer.h" 
+#include "oneapi/tbb/tbb_allocator.h" 
+#include "oneapi/tbb/tick_count.h" 
+#include "oneapi/tbb/version.h" 
+ 
+#endif /* __TBB_tbb_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h
index f6612fb4e3..f69e8bb3fe 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h
@@ -1,163 +1,163 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_blocked_range_H
-#define __TBB_blocked_range_H
-
-#include <cstddef>
-
-#include "detail/_range_common.h"
-#include "detail/_namespace_injection.h"
-
-#include "version.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-/** \page range_req Requirements on range concept
-    Class \c R implementing the concept of range must define:
-    - \code R::R( const R& ); \endcode               Copy constructor
-    - \code R::~R(); \endcode                        Destructor
-    - \code bool R::is_divisible() const; \endcode   True if range can be partitioned into two subranges
-    - \code bool R::empty() const; \endcode          True if range is empty
-    - \code R::R( R& r, split ); \endcode            Split range \c r into two subranges.
-**/
-
-//! A range over which to iterate.
-/** @ingroup algorithms */
-template<typename Value>
-class blocked_range {
-public:
-    //! Type of a value
-    /** Called a const_iterator for sake of algorithms that need to treat a blocked_range
-        as an STL container. */
-    using const_iterator = Value;
-
-    //! Type for size of a range
-    using size_type = std::size_t;
-
-    //! Construct range over half-open interval [begin,end), with the given grainsize.
-    blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) :
-        my_end(end_), my_begin(begin_), my_grainsize(grainsize_)
-    {
-        __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
-    }
-
-    //! Beginning of range.
-    const_iterator begin() const { return my_begin; }
-
-    //! One past last value in range.
-    const_iterator end() const { return my_end; }
-
-    //! Size of the range
-    /** Unspecified if end()<begin(). */
-    size_type size() const {
-        __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
-        return size_type(my_end-my_begin);
-    }
-
-    //! The grain size for this range.
-    size_type grainsize() const { return my_grainsize; }
-
-    //------------------------------------------------------------------------
-    // Methods that implement Range concept
-    //------------------------------------------------------------------------
-
-    //! True if range is empty.
-    bool empty() const { return !(my_begin<my_end); }
-
-    //! True if range is divisible.
-    /** Unspecified if end()<begin(). */
-    bool is_divisible() const { return my_grainsize<size(); }
-
-    //! Split range.
-    /** The new Range *this has the second part, the old range r has the first part.
-        Unspecified if end()<begin() or !is_divisible(). */
-    blocked_range( blocked_range& r, split ) :
-        my_end(r.my_end),
-        my_begin(do_split(r, split())),
-        my_grainsize(r.my_grainsize)
-    {
-        // only comparison 'less than' is required from values of blocked_range objects
-        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
-    }
-
-    //! Split range.
-    /** The new Range *this has the second part split according to specified proportion, the old range r has the first part.
-        Unspecified if end()<begin() or !is_divisible(). */
-    blocked_range( blocked_range& r, proportional_split& proportion ) :
-        my_end(r.my_end),
-        my_begin(do_split(r, proportion)),
-        my_grainsize(r.my_grainsize)
-    {
-        // only comparison 'less than' is required from values of blocked_range objects
-        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
-    }
-
-private:
-    /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */
-    Value my_end;
-    Value my_begin;
-    size_type my_grainsize;
-
-    //! Auxiliary function used by the splitting constructor.
-    static Value do_split( blocked_range& r, split )
-    {
-        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
-        Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;
-        r.my_end = middle;
-        return middle;
-    }
-
-    static Value do_split( blocked_range& r, proportional_split& proportion )
-    {
-        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
-
-        // usage of 32-bit floating point arithmetic is not enough to handle ranges of
-        // more than 2^24 iterations accurately. However, even on ranges with 2^64
-        // iterations the computational error approximately equals to 0.000001% which
-        // makes small impact on uniform distribution of such range's iterations (assuming
-        // all iterations take equal time to complete). See 'test_partitioner_whitebox'
-        // for implementation of an exact split algorithm
-        size_type right_part = size_type(float(r.size()) * float(proportion.right())
-                                         / float(proportion.left() + proportion.right()) + 0.5f);
-        return r.my_end = Value(r.my_end - right_part);
-    }
-
-    template<typename RowValue, typename ColValue>
-    friend class blocked_range2d;
-
-    template<typename RowValue, typename ColValue, typename PageValue>
-    friend class blocked_range3d;
-
-    template<typename DimValue, unsigned int N, typename>
-    friend class blocked_rangeNd_impl;
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::blocked_range;
-// Split types
-using detail::split;
-using detail::proportional_split;
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_blocked_range_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_blocked_range_H 
+#define __TBB_blocked_range_H 
+ 
+#include <cstddef> 
+ 
+#include "detail/_range_common.h" 
+#include "detail/_namespace_injection.h" 
+ 
+#include "version.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+/** \page range_req Requirements on range concept 
+    Class \c R implementing the concept of range must define: 
+    - \code R::R( const R& ); \endcode               Copy constructor 
+    - \code R::~R(); \endcode                        Destructor 
+    - \code bool R::is_divisible() const; \endcode   True if range can be partitioned into two subranges 
+    - \code bool R::empty() const; \endcode          True if range is empty 
+    - \code R::R( R& r, split ); \endcode            Split range \c r into two subranges. 
+**/ 
+ 
+//! A range over which to iterate. 
+/** @ingroup algorithms */ 
+template<typename Value> 
+class blocked_range { 
+public: 
+    //! Type of a value 
+    /** Called a const_iterator for sake of algorithms that need to treat a blocked_range 
+        as an STL container. */ 
+    using const_iterator = Value; 
+ 
+    //! Type for size of a range 
+    using size_type = std::size_t; 
+ 
+    //! Construct range over half-open interval [begin,end), with the given grainsize. 
+    blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : 
+        my_end(end_), my_begin(begin_), my_grainsize(grainsize_) 
+    { 
+        __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); 
+    } 
+ 
+    //! Beginning of range. 
+    const_iterator begin() const { return my_begin; } 
+ 
+    //! One past last value in range. 
+    const_iterator end() const { return my_end; } 
+ 
+    //! Size of the range 
+    /** Unspecified if end()<begin(). */ 
+    size_type size() const { 
+        __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" ); 
+        return size_type(my_end-my_begin); 
+    } 
+ 
+    //! The grain size for this range. 
+    size_type grainsize() const { return my_grainsize; } 
+ 
+    //------------------------------------------------------------------------ 
+    // Methods that implement Range concept 
+    //------------------------------------------------------------------------ 
+ 
+    //! True if range is empty. 
+    bool empty() const { return !(my_begin<my_end); } 
+ 
+    //! True if range is divisible. 
+    /** Unspecified if end()<begin(). */ 
+    bool is_divisible() const { return my_grainsize<size(); } 
+ 
+    //! Split range. 
+    /** The new Range *this has the second part, the old range r has the first part. 
+        Unspecified if end()<begin() or !is_divisible(). */ 
+    blocked_range( blocked_range& r, split ) : 
+        my_end(r.my_end), 
+        my_begin(do_split(r, split())), 
+        my_grainsize(r.my_grainsize) 
+    { 
+        // only comparison 'less than' is required from values of blocked_range objects 
+        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); 
+    } 
+ 
+    //! Split range. 
+    /** The new Range *this has the second part split according to specified proportion, the old range r has the first part. 
+        Unspecified if end()<begin() or !is_divisible(). */ 
+    blocked_range( blocked_range& r, proportional_split& proportion ) : 
+        my_end(r.my_end), 
+        my_begin(do_split(r, proportion)), 
+        my_grainsize(r.my_grainsize) 
+    { 
+        // only comparison 'less than' is required from values of blocked_range objects 
+        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); 
+    } 
+ 
+private: 
+    /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */ 
+    Value my_end; 
+    Value my_begin; 
+    size_type my_grainsize; 
+ 
+    //! Auxiliary function used by the splitting constructor. 
+    static Value do_split( blocked_range& r, split ) 
+    { 
+        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); 
+        Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u; 
+        r.my_end = middle; 
+        return middle; 
+    } 
+ 
+    static Value do_split( blocked_range& r, proportional_split& proportion ) 
+    { 
+        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); 
+ 
+        // usage of 32-bit floating point arithmetic is not enough to handle ranges of 
+        // more than 2^24 iterations accurately. However, even on ranges with 2^64 
+        // iterations the computational error approximately equals to 0.000001% which 
+        // makes small impact on uniform distribution of such range's iterations (assuming 
+        // all iterations take equal time to complete). See 'test_partitioner_whitebox' 
+        // for implementation of an exact split algorithm 
+        size_type right_part = size_type(float(r.size()) * float(proportion.right()) 
+                                         / float(proportion.left() + proportion.right()) + 0.5f); 
+        return r.my_end = Value(r.my_end - right_part); 
+    } 
+ 
+    template<typename RowValue, typename ColValue> 
+    friend class blocked_range2d; 
+ 
+    template<typename RowValue, typename ColValue, typename PageValue> 
+    friend class blocked_range3d; 
+ 
+    template<typename DimValue, unsigned int N, typename> 
+    friend class blocked_rangeNd_impl; 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::blocked_range; 
+// Split types 
+using detail::split; 
+using detail::proportional_split; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_blocked_range_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h
index 01ed17d859..1825285961 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h
@@ -1,108 +1,108 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_blocked_range2d_H
-#define __TBB_blocked_range2d_H
-
-#include <cstddef>
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-#include "blocked_range.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! A 2-dimensional range that models the Range concept.
-/** @ingroup algorithms */
-template<typename RowValue, typename ColValue = RowValue>
-class blocked_range2d {
-public:
-    //! Type for size of an iteration range
-    using row_range_type = blocked_range<RowValue>;
-    using col_range_type = blocked_range<ColValue>;
-
-private:
-    row_range_type my_rows;
-    col_range_type my_cols;
-
-public:
-    blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
-                     ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
-        my_rows(row_begin,row_end,row_grainsize),
-        my_cols(col_begin,col_end,col_grainsize)
-    {}
-
-    blocked_range2d( RowValue row_begin, RowValue row_end,
-                     ColValue col_begin, ColValue col_end ) :
-        my_rows(row_begin,row_end),
-        my_cols(col_begin,col_end)
-    {}
-
-    //! True if range is empty
-    bool empty() const {
-        // Range is empty if at least one dimension is empty.
-        return my_rows.empty() || my_cols.empty();
-    }
-
-    //! True if range is divisible into two pieces.
-    bool is_divisible() const {
-        return my_rows.is_divisible() || my_cols.is_divisible();
-    }
-
-    blocked_range2d( blocked_range2d& r, split ) :
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        split split_obj;
-        do_split(r, split_obj);
-    }
-
-    blocked_range2d( blocked_range2d& r, proportional_split& proportion ) :
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        do_split(r, proportion);
-    }
-
-    //! The rows of the iteration space
-    const row_range_type& rows() const { return my_rows; }
-
-    //! The columns of the iteration space
-    const col_range_type& cols() const { return my_cols; }
-
-private:
-    template <typename Split>
-    void do_split( blocked_range2d& r, Split& split_obj ) {
-        if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
-            my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-        } else {
-            my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
-        }
-    }
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::blocked_range2d;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_blocked_range2d_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_blocked_range2d_H 
+#define __TBB_blocked_range2d_H 
+ 
+#include <cstddef> 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+#include "blocked_range.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! A 2-dimensional range that models the Range concept. 
+/** @ingroup algorithms */ 
+template<typename RowValue, typename ColValue = RowValue> 
+class blocked_range2d { 
+public: 
+    //! Type for size of an iteration range 
+    using row_range_type = blocked_range<RowValue>; 
+    using col_range_type = blocked_range<ColValue>; 
+ 
+private: 
+    row_range_type my_rows; 
+    col_range_type my_cols; 
+ 
+public: 
+    blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, 
+                     ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : 
+        my_rows(row_begin,row_end,row_grainsize), 
+        my_cols(col_begin,col_end,col_grainsize) 
+    {} 
+ 
+    blocked_range2d( RowValue row_begin, RowValue row_end, 
+                     ColValue col_begin, ColValue col_end ) : 
+        my_rows(row_begin,row_end), 
+        my_cols(col_begin,col_end) 
+    {} 
+ 
+    //! True if range is empty 
+    bool empty() const { 
+        // Range is empty if at least one dimension is empty. 
+        return my_rows.empty() || my_cols.empty(); 
+    } 
+ 
+    //! True if range is divisible into two pieces. 
+    bool is_divisible() const { 
+        return my_rows.is_divisible() || my_cols.is_divisible(); 
+    } 
+ 
+    blocked_range2d( blocked_range2d& r, split ) : 
+        my_rows(r.my_rows), 
+        my_cols(r.my_cols) 
+    { 
+        split split_obj; 
+        do_split(r, split_obj); 
+    } 
+ 
+    blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : 
+        my_rows(r.my_rows), 
+        my_cols(r.my_cols) 
+    { 
+        do_split(r, proportion); 
+    } 
+ 
+    //! The rows of the iteration space 
+    const row_range_type& rows() const { return my_rows; } 
+ 
+    //! The columns of the iteration space 
+    const col_range_type& cols() const { return my_cols; } 
+ 
+private: 
+    template <typename Split> 
+    void do_split( blocked_range2d& r, Split& split_obj ) { 
+        if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { 
+            my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); 
+        } else { 
+            my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); 
+        } 
+    } 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::blocked_range2d; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_blocked_range2d_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h
index d4178050a8..4754fa8d3c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h
@@ -1,127 +1,127 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_blocked_range3d_H
-#define __TBB_blocked_range3d_H
-
-#include <cstddef>
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-#include "blocked_range.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! A 3-dimensional range that models the Range concept.
-/** @ingroup algorithms */
-template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue>
-class blocked_range3d {
-public:
-    //! Type for size of an iteration range
-    using page_range_type = blocked_range<PageValue>;
-    using row_range_type = blocked_range<RowValue>;
-    using col_range_type = blocked_range<ColValue>;
-
-private:
-    page_range_type my_pages;
-    row_range_type  my_rows;
-    col_range_type  my_cols;
-
-public:
-
-    blocked_range3d( PageValue page_begin, PageValue page_end,
-                     RowValue  row_begin,  RowValue row_end,
-                     ColValue  col_begin,  ColValue col_end ) :
-        my_pages(page_begin,page_end),
-        my_rows(row_begin,row_end),
-        my_cols(col_begin,col_end)
-    {}
-
-    blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize,
-                     RowValue  row_begin,  RowValue row_end,   typename row_range_type::size_type row_grainsize,
-                     ColValue  col_begin,  ColValue col_end,   typename col_range_type::size_type col_grainsize ) :
-        my_pages(page_begin,page_end,page_grainsize),
-        my_rows(row_begin,row_end,row_grainsize),
-        my_cols(col_begin,col_end,col_grainsize)
-    {}
-
-    //! True if range is empty
-    bool empty() const {
-        // Range is empty if at least one dimension is empty.
-        return my_pages.empty() || my_rows.empty() || my_cols.empty();
-    }
-
-    //! True if range is divisible into two pieces.
-    bool is_divisible() const {
-        return  my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
-    }
-
-    blocked_range3d( blocked_range3d& r, split split_obj ) :
-        my_pages(r.my_pages),
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        do_split(r, split_obj);
-    }
-
-    blocked_range3d( blocked_range3d& r, proportional_split& proportion ) :
-        my_pages(r.my_pages),
-        my_rows(r.my_rows),
-        my_cols(r.my_cols)
-    {
-        do_split(r, proportion);
-    }
-
-    //! The pages of the iteration space
-    const page_range_type& pages() const { return my_pages; }
-
-    //! The rows of the iteration space
-    const row_range_type& rows() const { return my_rows; }
-
-    //! The columns of the iteration space
-    const col_range_type& cols() const { return my_cols; }
-
-private:
-    template <typename Split>
-    void do_split( blocked_range3d& r, Split& split_obj) {
-        if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
-            if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
-                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-            } else {
-                my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
-            }
-        } else {
-            if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
-                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
-            } else {
-                my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj);
-            }
-        }
-    }
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::blocked_range3d;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_blocked_range3d_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_blocked_range3d_H 
+#define __TBB_blocked_range3d_H 
+ 
+#include <cstddef> 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+#include "blocked_range.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! A 3-dimensional range that models the Range concept. 
+/** @ingroup algorithms */ 
+template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue> 
+class blocked_range3d { 
+public: 
+    //! Type for size of an iteration range 
+    using page_range_type = blocked_range<PageValue>; 
+    using row_range_type = blocked_range<RowValue>; 
+    using col_range_type = blocked_range<ColValue>; 
+ 
+private: 
+    page_range_type my_pages; 
+    row_range_type  my_rows; 
+    col_range_type  my_cols; 
+ 
+public: 
+ 
+    blocked_range3d( PageValue page_begin, PageValue page_end, 
+                     RowValue  row_begin,  RowValue row_end, 
+                     ColValue  col_begin,  ColValue col_end ) : 
+        my_pages(page_begin,page_end), 
+        my_rows(row_begin,row_end), 
+        my_cols(col_begin,col_end) 
+    {} 
+ 
+    blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, 
+                     RowValue  row_begin,  RowValue row_end,   typename row_range_type::size_type row_grainsize, 
+                     ColValue  col_begin,  ColValue col_end,   typename col_range_type::size_type col_grainsize ) : 
+        my_pages(page_begin,page_end,page_grainsize), 
+        my_rows(row_begin,row_end,row_grainsize), 
+        my_cols(col_begin,col_end,col_grainsize) 
+    {} 
+ 
+    //! True if range is empty 
+    bool empty() const { 
+        // Range is empty if at least one dimension is empty. 
+        return my_pages.empty() || my_rows.empty() || my_cols.empty(); 
+    } 
+ 
+    //! True if range is divisible into two pieces. 
+    bool is_divisible() const { 
+        return  my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); 
+    } 
+ 
+    blocked_range3d( blocked_range3d& r, split split_obj ) : 
+        my_pages(r.my_pages), 
+        my_rows(r.my_rows), 
+        my_cols(r.my_cols) 
+    { 
+        do_split(r, split_obj); 
+    } 
+ 
+    blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : 
+        my_pages(r.my_pages), 
+        my_rows(r.my_rows), 
+        my_cols(r.my_cols) 
+    { 
+        do_split(r, proportion); 
+    } 
+ 
+    //! The pages of the iteration space 
+    const page_range_type& pages() const { return my_pages; } 
+ 
+    //! The rows of the iteration space 
+    const row_range_type& rows() const { return my_rows; } 
+ 
+    //! The columns of the iteration space 
+    const col_range_type& cols() const { return my_cols; } 
+ 
+private: 
+    template <typename Split> 
+    void do_split( blocked_range3d& r, Split& split_obj) { 
+        if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { 
+            if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { 
+                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); 
+            } else { 
+                my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); 
+            } 
+        } else { 
+            if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { 
+                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); 
+            } else { 
+                my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); 
+            } 
+        } 
+    } 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::blocked_range3d; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_blocked_range3d_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h
index 37b71da8fe..9b1531a07a 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h
@@ -1,144 +1,144 @@
-/*
-    Copyright (c) 2017-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_blocked_rangeNd_H
-#define __TBB_blocked_rangeNd_H
-
-#if !TBB_PREVIEW_BLOCKED_RANGE_ND
-    #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h
-#endif
-
-#include <algorithm>    // std::any_of
-#include <array>
-#include <cstddef>
-#include <type_traits>  // std::is_same, std::enable_if
-
-#include "detail/_config.h"
-#include "detail/_template_helpers.h" // index_sequence, make_index_sequence
-
-#include "blocked_range.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-/*
-    The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with
-    exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity
-    to use braced-init-list parameters to initialize each dimension.
-    Use of parameters, whose representation is a braced-init-list, but they're not
-    std::initializer_list or a reference to one, produces a non-deduced context
-    within template argument deduction.
-
-    NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl
-    (and not e.g. a derived class), otherwise it would need to declare its own ctor
-    facing the same problem that the impl class solves.
-*/
-
-template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>>
-class blocked_rangeNd_impl;
-
-template<typename Value, unsigned int N, std::size_t... Is>
-class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> {
-public:
-    //! Type of a value.
-    using value_type = Value;
-
-private:
-    //! Helper type to construct range with N tbb::blocked_range<value_type> objects.
-    template<std::size_t>
-    using dim_type_helper = tbb::blocked_range<value_type>;
-
-public:
-    blocked_rangeNd_impl() = delete;
-
-    //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>.
-    blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {}
-
-    //! Dimensionality of a range.
-    static constexpr unsigned int ndims() { return N; }
-
-    //! Range in certain dimension.
-    const tbb::blocked_range<value_type>& dim(unsigned int dimension) const {
-        __TBB_ASSERT(dimension < N, "out of bound");
-        return my_dims[dimension];
-    }
-
-    //------------------------------------------------------------------------
-    // Methods that implement Range concept
-    //------------------------------------------------------------------------
-
-    //! True if at least one dimension is empty.
-    bool empty() const {
-        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
-            return d.empty();
-        });
-    }
-
-    //! True if at least one dimension is divisible.
-    bool is_divisible() const {
-        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
-            return d.is_divisible();
-        });
-    }
-
-    blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) {
-        do_split(r, proportion);
-    }
-
-    blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) {
-        do_split(r, proportion);
-    }
-
-private:
-    static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed");
-
-    //! Ranges in each dimension.
-    std::array<tbb::blocked_range<value_type>, N> my_dims;
-
-    template<typename split_type>
-    void do_split(blocked_rangeNd_impl& r, split_type proportion) {
-        static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect");
-        __TBB_ASSERT(r.is_divisible(), "can't split not divisible range");
-
-        auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) {
-            return (first.size() * second.grainsize() < second.size() * first.grainsize());
-        });
-
-        auto r_it = r.my_dims.begin() + (my_it - my_dims.begin());
-
-        my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion);
-
-        // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to
-        // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept
-        __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin),
-                     "blocked_range has been split incorrectly");
-    }
-};
-
-template<typename Value, unsigned int N>
-using blocked_rangeNd = blocked_rangeNd_impl<Value, N>;
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::blocked_rangeNd;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_blocked_rangeNd_H */
-
+/* 
+    Copyright (c) 2017-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_blocked_rangeNd_H 
+#define __TBB_blocked_rangeNd_H 
+ 
+#if !TBB_PREVIEW_BLOCKED_RANGE_ND 
+    #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h 
+#endif 
+ 
+#include <algorithm>    // std::any_of 
+#include <array> 
+#include <cstddef> 
+#include <type_traits>  // std::is_same, std::enable_if 
+ 
+#include "detail/_config.h" 
+#include "detail/_template_helpers.h" // index_sequence, make_index_sequence 
+ 
+#include "blocked_range.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+/* 
+    The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with 
+    exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity 
+    to use braced-init-list parameters to initialize each dimension. 
+    Use of parameters, whose representation is a braced-init-list, but they're not 
+    std::initializer_list or a reference to one, produces a non-deduced context 
+    within template argument deduction. 
+ 
+    NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl 
+    (and not e.g. a derived class), otherwise it would need to declare its own ctor 
+    facing the same problem that the impl class solves. 
+*/ 
+ 
+template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>> 
+class blocked_rangeNd_impl; 
+ 
+template<typename Value, unsigned int N, std::size_t... Is> 
+class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> { 
+public: 
+    //! Type of a value. 
+    using value_type = Value; 
+ 
+private: 
+    //! Helper type to construct range with N tbb::blocked_range<value_type> objects. 
+    template<std::size_t> 
+    using dim_type_helper = tbb::blocked_range<value_type>; 
+ 
+public: 
+    blocked_rangeNd_impl() = delete; 
+ 
+    //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>. 
+    blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {} 
+ 
+    //! Dimensionality of a range. 
+    static constexpr unsigned int ndims() { return N; } 
+ 
+    //! Range in certain dimension. 
+    const tbb::blocked_range<value_type>& dim(unsigned int dimension) const { 
+        __TBB_ASSERT(dimension < N, "out of bound"); 
+        return my_dims[dimension]; 
+    } 
+ 
+    //------------------------------------------------------------------------ 
+    // Methods that implement Range concept 
+    //------------------------------------------------------------------------ 
+ 
+    //! True if at least one dimension is empty. 
+    bool empty() const { 
+        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { 
+            return d.empty(); 
+        }); 
+    } 
+ 
+    //! True if at least one dimension is divisible. 
+    bool is_divisible() const { 
+        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { 
+            return d.is_divisible(); 
+        }); 
+    } 
+ 
+    blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { 
+        do_split(r, proportion); 
+    } 
+ 
+    blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { 
+        do_split(r, proportion); 
+    } 
+ 
+private: 
+    static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); 
+ 
+    //! Ranges in each dimension. 
+    std::array<tbb::blocked_range<value_type>, N> my_dims; 
+ 
+    template<typename split_type> 
+    void do_split(blocked_rangeNd_impl& r, split_type proportion) { 
+        static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect"); 
+        __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); 
+ 
+        auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) { 
+            return (first.size() * second.grainsize() < second.size() * first.grainsize()); 
+        }); 
+ 
+        auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); 
+ 
+        my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion); 
+ 
+        // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to 
+        // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept 
+        __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), 
+                     "blocked_range has been split incorrectly"); 
+    } 
+}; 
+ 
+template<typename Value, unsigned int N> 
+using blocked_rangeNd = blocked_rangeNd_impl<Value, N>; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::blocked_rangeNd; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_blocked_rangeNd_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h
index 645f3fbd2e..dbc4ec1c13 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h
@@ -1,189 +1,189 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_cache_aligned_allocator_H
-#define __TBB_cache_aligned_allocator_H
-
-#include "detail/_utils.h"
-#include "detail/_namespace_injection.h"
-#include <cstdlib>
-#include <utility>
-
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-#error #include <memory_resource>
-#endif
-
-namespace tbb {
-namespace detail {
-
-namespace r1 {
-void*       __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size);
-void        __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p);
-std::size_t __TBB_EXPORTED_FUNC cache_line_size();
-}
-
-namespace d1 {
-
-template<typename T>
-class cache_aligned_allocator {
-public:
-    using value_type = T;
-    using propagate_on_container_move_assignment = std::true_type;
-
-    //! Always defined for TBB containers (supported since C++17 for std containers)
-    using is_always_equal = std::true_type;
-
-    cache_aligned_allocator() = default;
-    template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {}
-
-    //! Allocate space for n objects, starting on a cache/sector line.
-    __TBB_nodiscard T* allocate(std::size_t n) {
-        return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type)));
-    }
-
-    //! Free block of memory that starts on a cache line
-    void deallocate(T* p, std::size_t) {
-        r1::cache_aligned_deallocate(p);
-    }
-
-    //! Largest value for which method allocate might succeed.
-    std::size_t max_size() const noexcept {
-        return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type);
-    }
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    using pointer = value_type*;
-    using const_pointer = const value_type*;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using difference_type = std::ptrdiff_t;
-    using size_type = std::size_t;
-    template<typename U> struct rebind {
-        using other = cache_aligned_allocator<U>;
-    };
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new (p) U(std::forward<Args>(args)...); }
-    void destroy(pointer p) { p->~value_type(); }
-    pointer address(reference x) const { return &x; }
-    const_pointer address(const_reference x) const { return &x; }
-#endif // TBB_ALLOCATOR_TRAITS_BROKEN
-};
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    template<>
-    class cache_aligned_allocator<void> {
-    public:
-        using pointer = void*;
-        using const_pointer = const void*;
-        using value_type = void;
-        template<typename U> struct rebind {
-            using other = cache_aligned_allocator<U>;
-        };
-    };
-#endif
-
-template<typename T, typename U>
-bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; }
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template<typename T, typename U>
-bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; }
-#endif
-
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-
-//! C++17 memory resource wrapper to ensure cache line size alignment
-class cache_aligned_resource : public std::pmr::memory_resource {
-public:
-    cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {}
-    explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {}
-
-    std::pmr::memory_resource* upstream_resource() const {
-        return m_upstream;
-    }
-
-private:
-    //! We don't know what memory resource set. Use padding to guarantee alignment
-    void* do_allocate(std::size_t bytes, std::size_t alignment) override {
-        // TODO: make it common with tbb_allocator.cpp
-        std::size_t cache_line_alignment = correct_alignment(alignment);
-        std::size_t space = correct_size(bytes) + cache_line_alignment;
-        std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space));
-        __TBB_ASSERT(base != 0, "Upstream resource returned NULL.");
-
-        // Round up to the next cache line (align the base address)
-        std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1);
-        __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header");
-        __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
-
-        // Record where block actually starts.
-        (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
-        return reinterpret_cast<void*>(result);
-    }
-
-    void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override {
-        if (ptr) {
-            // Recover where block actually starts
-            std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1];
-            m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment));
-        }
-    }
-
-    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
-        if (this == &other) { return true; }
-#if __TBB_USE_OPTIONAL_RTTI
-        const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other);
-        return other_res && (upstream_resource() == other_res->upstream_resource());
-#else
-        return false;
-#endif
-    }
-
-    std::size_t correct_alignment(std::size_t alignment) {
-        __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2");
-#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT
-        std::size_t cache_line_size = std::hardware_destructive_interference_size;
-#else
-        std::size_t cache_line_size = r1::cache_line_size();
-#endif
-        return alignment < cache_line_size ? cache_line_size : alignment;
-    }
-
-    std::size_t correct_size(std::size_t bytes) {
-        // To handle the case, when small size requested. There could be not
-        // enough space to store the original pointer.
-        return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes;
-    }
-
-    std::pmr::memory_resource* m_upstream;
-};
-
-#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::cache_aligned_allocator;
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-using detail::d1::cache_aligned_resource;
-#endif
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_cache_aligned_allocator_H */
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_cache_aligned_allocator_H 
+#define __TBB_cache_aligned_allocator_H 
+ 
+#include "detail/_utils.h" 
+#include "detail/_namespace_injection.h" 
+#include <cstdlib> 
+#include <utility> 
+ 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+#error #include <memory_resource> 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace r1 { 
+void*       __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); 
+void        __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); 
+std::size_t __TBB_EXPORTED_FUNC cache_line_size(); 
+} 
+ 
+namespace d1 { 
+ 
+template<typename T> 
+class cache_aligned_allocator { 
+public: 
+    using value_type = T; 
+    using propagate_on_container_move_assignment = std::true_type; 
+ 
+    //! Always defined for TBB containers (supported since C++17 for std containers) 
+    using is_always_equal = std::true_type; 
+ 
+    cache_aligned_allocator() = default; 
+    template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {} 
+ 
+    //! Allocate space for n objects, starting on a cache/sector line. 
+    __TBB_nodiscard T* allocate(std::size_t n) { 
+        return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type))); 
+    } 
+ 
+    //! Free block of memory that starts on a cache line 
+    void deallocate(T* p, std::size_t) { 
+        r1::cache_aligned_deallocate(p); 
+    } 
+ 
+    //! Largest value for which method allocate might succeed. 
+    std::size_t max_size() const noexcept { 
+        return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); 
+    } 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    using pointer = value_type*; 
+    using const_pointer = const value_type*; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using difference_type = std::ptrdiff_t; 
+    using size_type = std::size_t; 
+    template<typename U> struct rebind { 
+        using other = cache_aligned_allocator<U>; 
+    }; 
+    template<typename U, typename... Args> 
+    void construct(U *p, Args&&... args) 
+        { ::new (p) U(std::forward<Args>(args)...); } 
+    void destroy(pointer p) { p->~value_type(); } 
+    pointer address(reference x) const { return &x; } 
+    const_pointer address(const_reference x) const { return &x; } 
+#endif // TBB_ALLOCATOR_TRAITS_BROKEN 
+}; 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    template<> 
+    class cache_aligned_allocator<void> { 
+    public: 
+        using pointer = void*; 
+        using const_pointer = const void*; 
+        using value_type = void; 
+        template<typename U> struct rebind { 
+            using other = cache_aligned_allocator<U>; 
+        }; 
+    }; 
+#endif 
+ 
+template<typename T, typename U> 
+bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; } 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template<typename T, typename U> 
+bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; } 
+#endif 
+ 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+ 
+//! C++17 memory resource wrapper to ensure cache line size alignment 
+class cache_aligned_resource : public std::pmr::memory_resource { 
+public: 
+    cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} 
+    explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} 
+ 
+    std::pmr::memory_resource* upstream_resource() const { 
+        return m_upstream; 
+    } 
+ 
+private: 
+    //! We don't know what memory resource set. Use padding to guarantee alignment 
+    void* do_allocate(std::size_t bytes, std::size_t alignment) override { 
+        // TODO: make it common with tbb_allocator.cpp 
+        std::size_t cache_line_alignment = correct_alignment(alignment); 
+        std::size_t space = correct_size(bytes) + cache_line_alignment; 
+        std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space)); 
+        __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); 
+ 
+        // Round up to the next cache line (align the base address) 
+        std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); 
+        __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); 
+        __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); 
+ 
+        // Record where block actually starts. 
+        (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; 
+        return reinterpret_cast<void*>(result); 
+    } 
+ 
+    void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { 
+        if (ptr) { 
+            // Recover where block actually starts 
+            std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1]; 
+            m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment)); 
+        } 
+    } 
+ 
+    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { 
+        if (this == &other) { return true; } 
+#if __TBB_USE_OPTIONAL_RTTI 
+        const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other); 
+        return other_res && (upstream_resource() == other_res->upstream_resource()); 
+#else 
+        return false; 
+#endif 
+    } 
+ 
+    std::size_t correct_alignment(std::size_t alignment) { 
+        __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); 
+#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT 
+        std::size_t cache_line_size = std::hardware_destructive_interference_size; 
+#else 
+        std::size_t cache_line_size = r1::cache_line_size(); 
+#endif 
+        return alignment < cache_line_size ? cache_line_size : alignment; 
+    } 
+ 
+    std::size_t correct_size(std::size_t bytes) { 
+        // To handle the case, when small size requested. There could be not 
+        // enough space to store the original pointer. 
+        return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; 
+    } 
+ 
+    std::pmr::memory_resource* m_upstream; 
+}; 
+ 
+#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::cache_aligned_allocator; 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+using detail::d1::cache_aligned_resource; 
+#endif 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_cache_aligned_allocator_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/combinable.h b/contrib/libs/tbb/include/oneapi/tbb/combinable.h
index b676a30cc0..e211c970aa 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/combinable.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/combinable.h
@@ -1,69 +1,69 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_combinable_H
-#define __TBB_combinable_H
-
-#include "detail/_namespace_injection.h"
-
-#include "enumerable_thread_specific.h"
-#include "cache_aligned_allocator.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-/** \name combinable **/
-//@{
-//! Thread-local storage with optional reduction
-/** @ingroup containers */
-template <typename T>
-class combinable {
-    using my_alloc = typename tbb::cache_aligned_allocator<T>;
-    using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>;
-    my_ets_type my_ets;
-
-public:
-    combinable() = default;
-
-    template <typename Finit>
-    explicit combinable(Finit _finit) : my_ets(_finit) { }
-
-    void clear() { my_ets.clear(); }
-
-    T& local() { return my_ets.local(); }
-
-    T& local(bool& exists) { return my_ets.local(exists); }
-
-    // combine_func_t has signature T(T,T) or T(const T&, const T&)
-    template <typename CombineFunc>
-    T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); }
-
-    // combine_func_t has signature void(T) or void(const T&)
-    template <typename CombineFunc>
-    void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); }
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::combinable;
-} // inline namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_combinable_H */
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_combinable_H 
+#define __TBB_combinable_H 
+ 
+#include "detail/_namespace_injection.h" 
+ 
+#include "enumerable_thread_specific.h" 
+#include "cache_aligned_allocator.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+/** \name combinable **/ 
+//@{ 
+//! Thread-local storage with optional reduction 
+/** @ingroup containers */ 
+template <typename T> 
+class combinable { 
+    using my_alloc = typename tbb::cache_aligned_allocator<T>; 
+    using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>; 
+    my_ets_type my_ets; 
+ 
+public: 
+    combinable() = default; 
+ 
+    template <typename Finit> 
+    explicit combinable(Finit _finit) : my_ets(_finit) { } 
+ 
+    void clear() { my_ets.clear(); } 
+ 
+    T& local() { return my_ets.local(); } 
+ 
+    T& local(bool& exists) { return my_ets.local(exists); } 
+ 
+    // combine_func_t has signature T(T,T) or T(const T&, const T&) 
+    template <typename CombineFunc> 
+    T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } 
+ 
+    // combine_func_t has signature void(T) or void(const T&) 
+    template <typename CombineFunc> 
+    void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::combinable; 
+} // inline namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_combinable_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h
index 510557e9f2..1019e2fd3c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h
@@ -1,1524 +1,1524 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_hash_map_H
-#define __TBB_concurrent_hash_map_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_utils.h"
-#include "detail/_assert.h"
-#include "detail/_allocator_traits.h"
-#include "detail/_containers_helpers.h"
-#include "detail/_template_helpers.h"
-#include "detail/_hash_compare.h"
-#include "detail/_range_common.h"
-#include "tbb_allocator.h"
-#include "spin_rw_mutex.h"
-
-#include <atomic>
-#include <initializer_list>
-#include <tuple>
-#include <iterator>
-#include <utility>      // Need std::pair
-#include <cstring>      // Need std::memset
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-struct hash_map_node_base : no_copy {
-    using mutex_type = spin_rw_mutex;
-    // Scoped lock type for mutex
-    using scoped_type = mutex_type::scoped_lock;
-    // Next node in chain
-    hash_map_node_base* next;
-    mutex_type mutex;
-};
-
-// Incompleteness flag value
-static hash_map_node_base* const rehash_req = reinterpret_cast<hash_map_node_base*>(std::size_t(3));
-// Rehashed empty bucket flag
-static hash_map_node_base* const empty_rehashed = reinterpret_cast<hash_map_node_base*>(std::size_t(0));
-
-// base class of concurrent_hash_map
-
-template <typename Allocator>
-class hash_map_base {
-public:
-    using size_type = std::size_t;
-    using hashcode_type = std::size_t;
-    using segment_index_type = std::size_t;
-    using node_base = hash_map_node_base;
-
-    struct bucket : no_copy {
-        using mutex_type = spin_rw_mutex;
-        using scoped_type = mutex_type::scoped_lock;
-
-        bucket() : node_list(nullptr) {}
-        bucket( node_base* ptr ) : node_list(ptr) {}
-
-        mutex_type mutex;
-        std::atomic<node_base*> node_list;
-    };
-
-    using allocator_type = Allocator;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-    using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>;
-    using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>;
-
-    // Count of segments in the first block
-    static constexpr size_type embedded_block = 1;
-    // Count of segments in the first block
-    static constexpr size_type embedded_buckets = 1 << embedded_block;
-    // Count of segments in the first block
-    static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096
-    // Size of a pointer / table size
-    static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit
-
-    using segment_ptr_type = bucket*;
-    using atomic_segment_type = std::atomic<segment_ptr_type>;
-    using segments_table_type = atomic_segment_type[pointers_per_table];
-
-    hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) {
-        for (size_type i = 0; i != embedded_buckets; ++i) {
-            my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed);
-        }
-
-        for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) {
-            auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr;
-            my_table[segment_index].store(argument, std::memory_order_relaxed);
-        }
-
-        __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks");
-    }
-
-    // segment index of given index in the array
-    static segment_index_type segment_index_of( size_type index ) {
-        return segment_index_type(tbb::detail::log2( index|1 ));
-    }
-
-    // the first array index of given segment
-    static segment_index_type segment_base( segment_index_type k ) {
-        return (segment_index_type(1) << k & ~segment_index_type(1));
-    }
-
-    // segment size except for k == 0
-    static size_type segment_size( segment_index_type k ) {
-        return size_type(1) << k; // fake value for k==0
-    }
-
-    // true if ptr is valid pointer
-    static bool is_valid( void* ptr ) {
-        return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63);
-    }
-
-    template <typename... Args>
-    void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) {
-        for (size_type i = 0; i < sz; ++i) {
-            bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...);
-        }
-    }
-
-    // Initialize buckets
-    void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) {
-        if (is_initial) {
-            init_buckets_impl(ptr, sz);
-        } else {
-            init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req));
-        }
-    }
-
-    // Add node n to bucket b
-    static void add_to_bucket( bucket* b, node_base* n ) {
-        __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr);
-        n->next = b->node_list.load(std::memory_order_relaxed);
-        b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set
-    }
-
-    const bucket_allocator_type& get_allocator() const {
-        return my_allocator;
-    }
-
-    bucket_allocator_type& get_allocator() {
-        return my_allocator;
-    }
-
-    // Enable segment
-    void enable_segment( segment_index_type k, bool is_initial = false ) {
-        __TBB_ASSERT( k, "Zero segment must be embedded" );
-        size_type sz;
-        __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment");
-        if (k >= first_block) {
-            sz = segment_size(k);
-            segment_ptr_type ptr = nullptr;
-            try_call( [&] {
-                ptr = bucket_allocator_traits::allocate(my_allocator, sz);
-            } ).on_exception( [&] {
-                my_table[k].store(nullptr, std::memory_order_relaxed);
-            });
-
-            __TBB_ASSERT(ptr, nullptr);
-            init_buckets(ptr, sz, is_initial);
-            my_table[k].store(ptr, std::memory_order_release);
-            sz <<= 1;// double it to get entire capacity of the container
-        } else { // the first block
-            __TBB_ASSERT( k == embedded_block, "Wrong segment index" );
-            sz = segment_size(first_block);
-            segment_ptr_type ptr = nullptr;
-            try_call( [&] {
-                ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets);
-            } ).on_exception( [&] {
-                my_table[k].store(nullptr, std::memory_order_relaxed);
-            });
-
-            __TBB_ASSERT(ptr, nullptr);
-            init_buckets(ptr, sz - embedded_buckets, is_initial);
-            ptr -= segment_base(embedded_block);
-            for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets
-                my_table[i].store(ptr + segment_base(i), std::memory_order_release);
-        }
-        my_mask.store(sz-1, std::memory_order_release);
-    }
-
-    void delete_segment( segment_index_type s ) {
-        segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed);
-        size_type sz = segment_size( s ? s : 1 );
-
-        size_type deallocate_size = 0;
-
-        if (s >= first_block) { // the first segment or the next
-            deallocate_size = sz;
-        } else if (s == embedded_block && embedded_block != first_block) {
-            deallocate_size = segment_size(first_block) - embedded_buckets;
-        }
-
-        for (size_type i = 0; i < deallocate_size; ++i) {
-            bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i);
-        }
-        if (deallocate_size != 0) {
-            bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size);
-        }
-
-        if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed);
-    }
-
-    // Get bucket by (masked) hashcode
-    bucket *get_bucket( hashcode_type h ) const noexcept {
-        segment_index_type s = segment_index_of( h );
-        h -= segment_base(s);
-        segment_ptr_type seg = my_table[s].load(std::memory_order_acquire);
-        __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" );
-        return &seg[h];
-    }
-
-    // detail serial rehashing helper
-    void mark_rehashed_levels( hashcode_type h ) noexcept {
-        segment_index_type s = segment_index_of( h );
-        while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed))
-            if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) {
-                seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed);
-                mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s)
-            }
-    }
-
-    // Check for mask race
-    // Splitting into two functions should help inlining
-    inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const {
-        hashcode_type m_now, m_old = m;
-        m_now = my_mask.load(std::memory_order_acquire);
-        if (m_old != m_now) {
-            return check_rehashing_collision(h, m_old, m = m_now);
-        }
-        return false;
-    }
-
-    // Process mask race, check for rehashing collision
-    bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const {
-        __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m
-        if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event
-            // condition above proves that 'h' has some other bits set beside 'm_old'
-            // find next applicable mask after m_old    //TODO: look at bsl instruction
-            for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size
-                ;
-            m_old = (m_old<<1) - 1; // get full mask from a bit
-            __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr);
-            // check whether it is rehashing/ed
-            if( get_bucket(h & m_old)->node_list.load(std::memory_order_acquire) != rehash_req ) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    // Insert a node and check for load factor. @return segment index to enable.
-    segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) {
-        size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted
-        add_to_bucket( b, n );
-        // check load factor
-        if( sz >= mask ) { // TODO: add custom load_factor
-            segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of
-            __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated");
-            static const segment_ptr_type is_allocating = segment_ptr_type(2);;
-            segment_ptr_type disabled = nullptr;
-            if (!(my_table[new_seg].load(std::memory_order_acquire))
-                && my_table[new_seg].compare_exchange_strong(disabled, is_allocating))
-                return new_seg; // The value must be processed
-        }
-        return 0;
-    }
-
-    // Prepare enough segments for number of buckets
-    void reserve(size_type buckets) {
-        if( !buckets-- ) return;
-        bool is_initial = !my_size.load(std::memory_order_relaxed);
-        for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m;
-            m = my_mask.load(std::memory_order_relaxed))
-        {
-            enable_segment( segment_index_of( m+1 ), is_initial );
-        }
-    }
-
-    // Swap hash_map_bases
-    void internal_swap_content(hash_map_base &table) {
-        using std::swap;
-        swap_atomics_relaxed(my_mask, table.my_mask);
-        swap_atomics_relaxed(my_size, table.my_size);
-
-        for(size_type i = 0; i < embedded_buckets; i++) {
-            auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed);
-            my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed),
-                std::memory_order_relaxed);
-            table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed);
-        }
-        for(size_type i = embedded_block; i < pointers_per_table; i++) {
-            auto temp = my_table[i].load(std::memory_order_relaxed);
-            my_table[i].store(table.my_table[i].load(std::memory_order_relaxed),
-                std::memory_order_relaxed);
-            table.my_table[i].store(temp, std::memory_order_relaxed);
-        }
-    }
-
-    void internal_move(hash_map_base&& other) {
-        my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed);
-
-        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_size.store(0, std::memory_order_relaxed);
-
-        for (size_type i = 0; i < embedded_buckets; ++i) {
-            my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed);
-            other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed);
-        }
-
-        for (size_type i = embedded_block; i < pointers_per_table; ++i) {
-            my_table[i].store(other.my_table[i].load(std::memory_order_relaxed),
-                std::memory_order_relaxed);
-            other.my_table[i].store(nullptr, std::memory_order_relaxed);
-        }
-    }
-
-protected:
-
-    bucket_allocator_type my_allocator;
-    // Hash mask = sum of allocated segment sizes - 1
-    std::atomic<hashcode_type> my_mask;
-    // Size of container in stored items
-    std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects
-    // Zero segment
-    bucket my_embedded_segment[embedded_buckets];
-    // Segment pointers table. Also prevents false sharing between my_mask and my_size
-    segments_table_type my_table;
-};
-
-template <typename Iterator>
-class hash_map_range;
-
-// Meets requirements of a forward iterator for STL
-// Value is either the T or const T type of the container.
-template <typename Container, typename Value>
-class hash_map_iterator {
-    using map_type = Container;
-    using node = typename Container::node;
-    using map_base = typename Container::base_type;
-    using node_base = typename map_base::node_base;
-    using bucket = typename map_base::bucket;
-public:
-    using value_type = Value;
-    using size_type = typename Container::size_type;
-    using difference_type = typename Container::difference_type;
-    using pointer = value_type*;
-    using reference = value_type&;
-    using iterator_category = std::forward_iterator_tag;
-
-    // Construct undefined iterator
-    hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {}
-    hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) :
-        my_map(other.my_map),
-        my_index(other.my_index),
-        my_bucket(other.my_bucket),
-        my_node(other.my_node)
-    {}
-
-    hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) {
-        my_map = other.my_map;
-        my_index = other.my_index;
-        my_bucket = other.my_bucket;
-        my_node = other.my_node;
-        return *this;
-    }
-
-    Value& operator*() const {
-        __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" );
-        return my_node->value();
-    }
-
-    Value* operator->() const {return &operator*();}
-
-    hash_map_iterator& operator++() {
-        my_node = static_cast<node*>( my_node->next );
-        if( !my_node ) advance_to_next_bucket();
-        return *this;
-    }
-
-    // Post increment
-    hash_map_iterator operator++(int) {
-        hash_map_iterator old(*this);
-        operator++();
-        return old;
-    }
-private:
-    template <typename C, typename T, typename U>
-    friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-    template <typename C, typename T, typename U>
-    friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-    template <typename C, typename T, typename U>
-    friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j );
-
-    template <typename C, typename U>
-    friend class hash_map_iterator;
-
-    template <typename I>
-    friend class hash_map_range;
-
-    void advance_to_next_bucket() { // TODO?: refactor to iterator_base class
-        size_t k = my_index+1;
-        __TBB_ASSERT( my_bucket, "advancing an invalid iterator?");
-        while (k <= my_map->my_mask.load(std::memory_order_relaxed)) {
-            // Following test uses 2's-complement wizardry
-            if( k&(k-2) ) // not the beginning of a segment
-                ++my_bucket;
-            else my_bucket = my_map->get_bucket( k );
-            my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) );
-            if( map_base::is_valid(my_node) ) {
-                my_index = k; return;
-            }
-            ++k;
-        }
-        my_bucket = 0; my_node = 0; my_index = k; // the end
-    }
-
-    template <typename Key, typename T, typename HashCompare, typename A>
-    friend class concurrent_hash_map;
-
-    hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) :
-        my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n))
-    {
-        if( b && !map_base::is_valid(n) )
-            advance_to_next_bucket();
-    }
-
-    // concurrent_hash_map over which we are iterating.
-    const Container *my_map;
-    // Index in hash table for current item
-    size_t my_index;
-    // Pointer to bucket
-    const bucket* my_bucket;
-    // Pointer to node that has current item
-    node* my_node;
-};
-
-template <typename Container, typename T, typename U>
-bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
-    return i.my_node == j.my_node && i.my_map == j.my_map;
-}
-
-template <typename Container, typename T, typename U>
-bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) {
-    return i.my_node != j.my_node || i.my_map != j.my_map;
-}
-
-// Range class used with concurrent_hash_map
-template <typename Iterator>
-class hash_map_range {
-    using map_type = typename Iterator::map_type;
-public:
-    // Type for size of a range
-    using size_type = std::size_t;
-    using value_type = typename Iterator::value_type;
-    using reference = typename Iterator::reference;
-    using difference_type = typename Iterator::difference_type;
-    using iterator = Iterator;
-
-    // True if range is empty.
-    bool empty() const {return my_begin == my_end;}
-
-    // True if range can be partitioned into two subranges.
-    bool is_divisible() const {
-        return my_midpoint != my_end;
-    }
-
-    // Split range.
-    hash_map_range( hash_map_range& r, split ) :
-        my_end(r.my_end),
-        my_grainsize(r.my_grainsize)
-    {
-        r.my_end = my_begin = r.my_midpoint;
-        __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" );
-        __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" );
-        set_midpoint();
-        r.set_midpoint();
-    }
-
-    // Init range with container and grainsize specified
-    hash_map_range( const map_type &map, size_type grainsize_ = 1 ) :
-        my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ),
-        my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ),
-        my_grainsize( grainsize_ )
-    {
-        __TBB_ASSERT( grainsize_>0, "grainsize must be positive" );
-        set_midpoint();
-    }
-
-    const Iterator begin() const { return my_begin; }
-    const Iterator end() const { return my_end; }
-    // The grain size for this range.
-    size_type grainsize() const { return my_grainsize; }
-
-private:
-    Iterator my_begin;
-    Iterator my_end;
-    mutable Iterator my_midpoint;
-    size_t my_grainsize;
-    // Set my_midpoint to point approximately half way between my_begin and my_end.
-    void set_midpoint() const;
-    template <typename U> friend class hash_map_range;
-};
-
-template <typename Iterator>
-void hash_map_range<Iterator>::set_midpoint() const {
-    // Split by groups of nodes
-    size_t m = my_end.my_index-my_begin.my_index;
-    if( m > my_grainsize ) {
-        m = my_begin.my_index + m/2u;
-        auto b = my_begin.my_map->get_bucket(m);
-        my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed));
-    } else {
-        my_midpoint = my_end;
-    }
-    __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index,
-        "my_begin is after my_midpoint" );
-    __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index,
-        "my_midpoint is after my_end" );
-    __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end,
-        "[my_begin, my_midpoint) range should not be empty" );
-}
-
-template <typename Key, typename T,
-          typename HashCompare = tbb_hash_compare<Key>,
-          typename Allocator = tbb_allocator<std::pair<const Key, T>>>
-class concurrent_hash_map : protected hash_map_base<Allocator> {
-    template <typename Container, typename Value>
-    friend class hash_map_iterator;
-
-    template <typename I>
-    friend class hash_map_range;
-    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
-public:
-    using base_type = hash_map_base<Allocator>;
-    using key_type = Key;
-    using mapped_type = T;
-    // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors
-    // and copy/move constructor with explicit allocator argument
-    using allocator_type = tbb::detail::type_identity_t<Allocator>;
-    using hash_compare_type = tbb::detail::type_identity_t<HashCompare>;
-    using value_type = std::pair<const Key, T>;
-    using size_type = typename base_type::size_type;
-    using difference_type = std::ptrdiff_t;
-
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using iterator = hash_map_iterator<concurrent_hash_map, value_type>;
-    using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>;
-    using range_type = hash_map_range<iterator>;
-    using const_range_type = hash_map_range<const_iterator>;
-
-protected:
-    static_assert(std::is_same<value_type, typename Allocator::value_type>::value,
-        "value_type of the container must be the same as its allocator's");
-
-    friend class const_accessor;
-    class node;
-    using segment_index_type = typename base_type::segment_index_type;
-    using segment_ptr_type = typename base_type::segment_ptr_type;
-    using node_base = typename base_type::node_base;
-    using bucket = typename base_type::bucket;
-    using hashcode_type = typename base_type::hashcode_type;
-    using bucket_allocator_type = typename base_type::bucket_allocator_type;
-    using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>;
-    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>;
-    hash_compare_type my_hash_compare;
-
-    class node : public node_base {
-    public:
-        node() {}
-        ~node() {}
-        pointer storage() { return &my_value; }
-        value_type& value() { return *storage(); }
-    private:
-        union {
-            value_type my_value;
-        };
-    };
-
-    void delete_node( node_base *n ) {
-        node_allocator_type node_allocator(this->get_allocator());
-        node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage());
-        node_allocator_traits::destroy(node_allocator, static_cast<node*>(n));
-        node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1);
-    }
-
-    template <typename... Args>
-    static node* create_node(bucket_allocator_type& allocator, Args&&... args) {
-        node_allocator_type node_allocator(allocator);
-        node* node_ptr = node_allocator_traits::allocate(node_allocator, 1);
-        auto guard = make_raii_guard([&] {
-            node_allocator_traits::destroy(node_allocator, node_ptr);
-            node_allocator_traits::deallocate(node_allocator, node_ptr, 1);
-        });
-
-        node_allocator_traits::construct(node_allocator, node_ptr);
-        node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...);
-        guard.dismiss();
-        return node_ptr;
-    }
-
-    static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){
-        return create_node(allocator, key, *t);
-    }
-
-    static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){
-        return create_node(allocator, key, std::move(*const_cast<T*>(t)));
-    }
-
-    static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){
-        // Emplace construct an empty T object inside the pair
-        return create_node(allocator, std::piecewise_construct,
-                           std::forward_as_tuple(key), std::forward_as_tuple());
-    }
-
-    static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){
-        __TBB_ASSERT(false,"this dummy function should not be called");
-        return nullptr;
-    }
-
-    node *search_bucket( const key_type &key, bucket *b ) const {
-        node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) );
-        while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first))
-            n = static_cast<node*>( n->next );
-        __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket");
-        return n;
-    }
-
-    // bucket accessor is to find, rehash, acquire a lock, and access a bucket
-    class bucket_accessor : public bucket::scoped_type {
-        bucket *my_b;
-    public:
-        bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); }
-        // find a bucket by masked hashcode, optionally rehash, and acquire the lock
-        inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) {
-            my_b = base->get_bucket( h );
-            // TODO: actually, notification is unnecessary here, just hiding double-check
-            if( my_b->node_list.load(std::memory_order_acquire) == rehash_req
-                && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) )
-            {
-                if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing
-            }
-            else bucket::scoped_type::acquire( my_b->mutex, writer );
-            __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr);
-        }
-        // check whether bucket is locked for write
-        bool is_writer() { return bucket::scoped_type::m_is_writer; }
-        // get bucket pointer
-        bucket *operator() () { return my_b; }
-    };
-
-    // TODO refactor to hash_base
-    void rehash_bucket( bucket *b_new, const hashcode_type hash ) {
-        __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)");
-        __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" );
-        b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed
-        hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit
-        bucket_accessor b_old( this, hash & mask );
-
-        mask = (mask<<1) | 1; // get full mask for new bucket
-        __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr );
-    restart:
-        node_base* prev = nullptr;
-        node_base* curr = b_old()->node_list.load(std::memory_order_acquire);
-        while (this->is_valid(curr)) {
-            hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first);
-
-            if ((curr_node_hash & mask) == hash) {
-                if (!b_old.is_writer()) {
-                    if (!b_old.upgrade_to_writer()) {
-                        goto restart; // node ptr can be invalid due to concurrent erase
-                    }
-                }
-                node_base* next = curr->next;
-                // exclude from b_old
-                if (prev == nullptr) {
-                    b_old()->node_list.store(curr->next, std::memory_order_relaxed);
-                } else {
-                    prev->next = curr->next;
-                }
-                this->add_to_bucket(b_new, curr);
-                curr = next;
-            } else {
-                prev = curr;
-                curr = curr->next;
-            }
-        }
-    }
-
-public:
-
-    class accessor;
-    // Combines data access, locking, and garbage collection.
-    class const_accessor : private node::scoped_type /*which derived from no_copy*/ {
-        friend class concurrent_hash_map<Key,T,HashCompare,Allocator>;
-        friend class accessor;
-    public:
-        // Type of value
-        using value_type = const typename concurrent_hash_map::value_type;
-
-        // True if result is empty.
-        bool empty() const { return !my_node; }
-
-        // Set to null
-        void release() {
-            if( my_node ) {
-                node::scoped_type::release();
-                my_node = 0;
-            }
-        }
-
-        // Return reference to associated value in hash table.
-        const_reference operator*() const {
-            __TBB_ASSERT( my_node, "attempt to dereference empty accessor" );
-            return my_node->value();
-        }
-
-        // Return pointer to associated value in hash table.
-        const_pointer operator->() const {
-            return &operator*();
-        }
-
-        // Create empty result
-        const_accessor() : my_node(nullptr) {}
-
-        // Destroy result after releasing the underlying reference.
-        ~const_accessor() {
-            my_node = nullptr; // scoped lock's release() is called in its destructor
-        }
-    protected:
-        bool is_writer() { return node::scoped_type::m_is_writer; }
-        node *my_node;
-        hashcode_type my_hash;
-    };
-
-    // Allows write access to elements and combines data access, locking, and garbage collection.
-    class accessor: public const_accessor {
-    public:
-        // Type of value
-        using value_type = typename concurrent_hash_map::value_type;
-
-        // Return reference to associated value in hash table.
-        reference operator*() const {
-            __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" );
-            return this->my_node->value();
-        }
-
-        // Return pointer to associated value in hash table.
-        pointer operator->() const {
-            return &operator*();
-        }
-    };
-
-    explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() )
-        : base_type(a)
-        , my_hash_compare(compare)
-    {}
-
-    concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {}
-
-    explicit concurrent_hash_map( const allocator_type& a )
-        : concurrent_hash_map(hash_compare_type(), a)
-    {}
-
-    // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
-    concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() )
-        : concurrent_hash_map(a)
-    {
-        this->reserve(n);
-    }
-
-    concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() )
-        : concurrent_hash_map(compare, a)
-    {
-        this->reserve(n);
-    }
-
-    // Copy constructor
-    concurrent_hash_map( const concurrent_hash_map &table )
-        : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator()))
-    {
-        try_call( [&] {
-            internal_copy(table);
-        }).on_exception( [&] {
-            this->clear();
-        });
-    }
-
-    concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a)
-        : concurrent_hash_map(a)
-    {
-        try_call( [&] {
-            internal_copy(table);
-        }).on_exception( [&] {
-            this->clear();
-        });
-    }
-
-    // Move constructor
-    concurrent_hash_map( concurrent_hash_map &&table )
-        : concurrent_hash_map(std::move(table.get_allocator()))
-    {
-        this->internal_move(std::move(table));
-    }
-
-    // Move constructor
-    concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a )
-        : concurrent_hash_map(a)
-    {
-        using is_equal_type = typename node_allocator_traits::is_always_equal;
-        internal_move_construct_with_allocator(std::move(table), a, is_equal_type());
-    }
-
-    // Construction with copying iteration range and given allocator instance
-    template <typename I>
-    concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() )
-        : concurrent_hash_map(a)
-    {
-        try_call( [&] {
-            internal_copy(first, last, std::distance(first, last));
-        }).on_exception( [&] {
-            this->clear();
-        });
-    }
-
-    template <typename I>
-    concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() )
-        : concurrent_hash_map(compare, a)
-    {
-        try_call( [&] {
-            internal_copy(first, last, std::distance(first, last));
-        }).on_exception( [&] {
-            this->clear();
-        });
-    }
-
-    concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() )
-        : concurrent_hash_map(compare, a)
-    {
-        try_call( [&] {
-            internal_copy(il.begin(), il.end(), il.size());
-        }).on_exception( [&] {
-            this->clear();
-        });
-    }
-
-    concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a )
-        : concurrent_hash_map(il, hash_compare_type(), a) {}
-
-    // Assignment
-    concurrent_hash_map& operator=( const concurrent_hash_map &table ) {
-        if( this != &table ) {
-            clear();
-            copy_assign_allocators(this->my_allocator, table.my_allocator);
-            internal_copy(table);
-        }
-        return *this;
-    }
-
-    // Move Assignment
-    concurrent_hash_map& operator=( concurrent_hash_map &&table ) {
-        if( this != &table ) {
-            using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment;
-            using is_equal_type = typename node_allocator_traits::is_always_equal;
-            move_assign_allocators(this->my_allocator, table.my_allocator);
-            internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>());
-        }
-        return *this;
-    }
-
-    // Assignment
-    concurrent_hash_map& operator=( std::initializer_list<value_type> il ) {
-        clear();
-        internal_copy(il.begin(), il.end(), il.size());
-        return *this;
-    }
-
-    // Rehashes and optionally resizes the whole table.
-    /** Useful to optimize performance before or after concurrent operations.
-        Also enables using of find() and count() concurrent methods in serial context. */
-    void rehash(size_type sz = 0) {
-        this->reserve(sz); // TODO: add reduction of number of buckets as well
-        hashcode_type mask = this->my_mask.load(std::memory_order_relaxed);
-        hashcode_type b = (mask+1)>>1; // size or first index of the last segment
-        __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2
-        bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing
-        for(; b <= mask; b++, bp++ ) {
-            node_base *n = bp->node_list.load(std::memory_order_relaxed);
-            __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" );
-            __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" );
-            if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one
-                hashcode_type h = b; bucket *b_old = bp;
-                do {
-                    __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
-                    hashcode_type m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit
-                    b_old = this->get_bucket( h &= m );
-                } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req );
-                // now h - is index of the root rehashed bucket b_old
-                this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments
-                node_base* prev = nullptr;
-                node_base* curr = b_old->node_list.load(std::memory_order_relaxed);
-                while (this->is_valid(curr)) {
-                    hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first);
-
-                    if ((curr_node_hash & mask) != h) { // should be rehashed
-                        node_base* next = curr->next;
-                        // exclude from b_old
-                        if (prev == nullptr) {
-                            b_old->node_list.store(curr->next, std::memory_order_relaxed);
-                        } else {
-                            prev->next = curr->next;
-                        }
-                        bucket *b_new = this->get_bucket(curr_node_hash & mask);
-                        __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" );
-                        this->add_to_bucket(b_new, curr);
-                        curr = next;
-                    } else {
-                        prev = curr;
-                        curr = curr->next;
-                    }
-                }
-            }
-        }
-    }
-
-    // Clear table
-    void clear() {
-        hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
-        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-        this->my_size.store(0, std::memory_order_relaxed);
-        segment_index_type s = this->segment_index_of( m );
-        __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" );
-        do {
-            __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" );
-            segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed);
-            size_type sz = this->segment_size( s ? s : 1 );
-            for( segment_index_type i = 0; i < sz; i++ )
-                for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed);
-                    this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) )
-                {
-                    buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed);
-                    delete_node( n );
-                }
-            this->delete_segment(s);
-        } while(s-- > 0);
-        this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed);
-    }
-
-    // Clear table and destroy it.
-    ~concurrent_hash_map() { clear(); }
-
-    //------------------------------------------------------------------------
-    // Parallel algorithm support
-    //------------------------------------------------------------------------
-    range_type range( size_type grainsize=1 ) {
-        return range_type( *this, grainsize );
-    }
-    const_range_type range( size_type grainsize=1 ) const {
-        return const_range_type( *this, grainsize );
-    }
-
-    //------------------------------------------------------------------------
-    // STL support - not thread-safe methods
-    //------------------------------------------------------------------------
-    iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
-    const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
-    const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); }
-    iterator end() { return iterator( *this, 0, 0, 0 ); }
-    const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); }
-    const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); }
-    std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); }
-    std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); }
-
-    // Number of items in table.
-    size_type size() const { return this->my_size.load(std::memory_order_acquire); }
-
-    // True if size()==0.
-    __TBB_nodiscard bool empty() const { return size() == 0; }
-
-    // Upper bound on size.
-    size_type max_size() const {
-        return allocator_traits_type::max_size(base_type::get_allocator());
-    }
-
-    // Returns the current number of buckets
-    size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; }
-
-    // return allocator object
-    allocator_type get_allocator() const { return base_type::get_allocator(); }
-
-    // swap two instances. Iterators are invalidated
-    void swap(concurrent_hash_map& table) {
-        using pocs_type = typename node_allocator_traits::propagate_on_container_swap;
-        using is_equal_type = typename node_allocator_traits::is_always_equal;
-        swap_allocators(this->my_allocator, table.my_allocator);
-        internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>());
-    }
-
-    //------------------------------------------------------------------------
-    // concurrent map operations
-    //------------------------------------------------------------------------
-
-    // Return count of items (0 or 1)
-    size_type count( const Key &key ) const {
-        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node );
-    }
-
-    // Find item and acquire a read lock on the item.
-    /** Return true if item is found, false otherwise. */
-    bool find( const_accessor &result, const Key &key ) const {
-        result.release();
-        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node );
-    }
-
-    // Find item and acquire a write lock on the item.
-    /** Return true if item is found, false otherwise. */
-    bool find( accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node );
-    }
-
-    // Insert item (if not already present) and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct );
-    }
-
-    // Insert item (if not already present) and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, const Key &key ) {
-        result.release();
-        return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct );
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, const value_type &value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct );
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, const value_type &value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct );
-    }
-
-    // Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    bool insert( const value_type &value ) {
-        return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct );
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    bool insert( const_accessor &result, value_type && value ) {
-        return generic_move_insert(result, std::move(value));
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    bool insert( accessor &result, value_type && value ) {
-        return generic_move_insert(result, std::move(value));
-    }
-
-    // Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    bool insert( value_type && value ) {
-        return generic_move_insert(accessor_not_used(), std::move(value));
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a read lock on the item.
-    /** Returns true if item is new. */
-    template <typename... Args>
-    bool emplace( const_accessor &result, Args&&... args ) {
-        return generic_emplace(result, std::forward<Args>(args)...);
-    }
-
-    // Insert item by copying if there is no such key present already and acquire a write lock on the item.
-    /** Returns true if item is new. */
-    template <typename... Args>
-    bool emplace( accessor &result, Args&&... args ) {
-        return generic_emplace(result, std::forward<Args>(args)...);
-    }
-
-    // Insert item by copying if there is no such key present already
-    /** Returns true if item is inserted. */
-    template <typename... Args>
-    bool emplace( Args&&... args ) {
-        return generic_emplace(accessor_not_used(), std::forward<Args>(args)...);
-    }
-
-    // Insert range [first, last)
-    template <typename I>
-    void insert( I first, I last ) {
-        for ( ; first != last; ++first )
-            insert( *first );
-    }
-
-    // Insert initializer list
-    void insert( std::initializer_list<value_type> il ) {
-        insert( il.begin(), il.end() );
-    }
-
-    // Erase item.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( const Key &key ) {
-        node_base *erase_node;
-        hashcode_type const hash = my_hash_compare.hash(key);
-        hashcode_type mask = this->my_mask.load(std::memory_order_acquire);
-    restart:
-        {//lock scope
-            // get bucket
-            bucket_accessor b( this, hash & mask );
-        search:
-            node_base* prev = nullptr;
-            erase_node = b()->node_list.load(std::memory_order_relaxed);
-            while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) {
-                prev = erase_node;
-                erase_node = erase_node->next;
-            }
-
-            if (erase_node == nullptr) { // not found, but mask could be changed
-                if (this->check_mask_race(hash, mask))
-                    goto restart;
-                return false;
-            } else if (!b.is_writer() && !b.upgrade_to_writer()) {
-                if (this->check_mask_race(hash, mask)) // contended upgrade, check mask
-                    goto restart;
-                goto search;
-            }
-
-            // remove from container
-            if (prev == nullptr) {
-                b()->node_list.store(erase_node->next, std::memory_order_relaxed);
-            } else {
-                prev->next = erase_node->next;
-            }
-            this->my_size--;
-        }
-        {
-            typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true );
-        }
-        // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor!
-        delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket
-        return true;
-    }
-
-    // Erase item by const_accessor.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( const_accessor& item_accessor ) {
-        return exclude( item_accessor );
-    }
-
-    // Erase item by accessor.
-    /** Return true if item was erased by particularly this call. */
-    bool erase( accessor& item_accessor ) {
-        return exclude( item_accessor );
-    }
-
-protected:
-    // Insert or find item and optionally acquire a lock on the item.
-    bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&,
-        const Key&, const T*), node *tmp_n  = 0)
-    {
-        __TBB_ASSERT( !result || !result->my_node, nullptr );
-        bool return_value;
-        hashcode_type const h = my_hash_compare.hash( key );
-        hashcode_type m = this->my_mask.load(std::memory_order_acquire);
-        segment_index_type grow_segment = 0;
-        node *n;
-        restart:
-        {//lock scope
-            __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-            return_value = false;
-            // get bucket
-            bucket_accessor b( this, h & m );
-            // find a node
-            n = search_bucket( key, b() );
-            if( op_insert ) {
-                // [opt] insert a key
-                if( !n ) {
-                    if( !tmp_n ) {
-                        tmp_n = allocate_node(base_type::get_allocator(), key, t);
-                    }
-                    if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion
-                        // Rerun search_list, in case another thread inserted the item during the upgrade.
-                        n = search_bucket( key, b() );
-                        if( this->is_valid(n) ) { // unfortunately, it did
-                            b.downgrade_to_reader();
-                            goto exists;
-                        }
-                    }
-                    if( this->check_mask_race(h, m) )
-                        goto restart; // b.release() is done in ~b().
-                    // insert and set flag to grow the container
-                    grow_segment = this->insert_new_node( b(), n = tmp_n, m );
-                    tmp_n = 0;
-                    return_value = true;
-                }
-            } else { // find or count
-                if( !n ) {
-                    if( this->check_mask_race( h, m ) )
-                        goto restart; // b.release() is done in ~b(). TODO: replace by continue
-                    return false;
-                }
-                return_value = true;
-            }
-        exists:
-            if( !result ) goto check_growth;
-            // TODO: the following seems as generic/regular operation
-            // acquire the item
-            if( !result->try_acquire( n->mutex, write ) ) {
-                for( tbb::detail::atomic_backoff backoff(true);; ) {
-                    if( result->try_acquire( n->mutex, write ) ) break;
-                    if( !backoff.bounded_pause() ) {
-                        // the wait takes really long, restart the operation
-                        b.release();
-                        __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" );
-                        yield();
-                        m = this->my_mask.load(std::memory_order_acquire);
-                        goto restart;
-                    }
-                }
-            }
-        }//lock scope
-        result->my_node = n;
-        result->my_hash = h;
-    check_growth:
-        // [opt] grow the container
-        if( grow_segment ) {
-            this->enable_segment( grow_segment );
-        }
-        if( tmp_n ) // if op_insert only
-            delete_node( tmp_n );
-        return return_value;
-    }
-
-    struct accessor_not_used { void release(){}};
-    friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;}
-    friend const_accessor* accessor_location( const_accessor & a )      { return &a;}
-
-    friend bool is_write_access_needed( accessor const& )           { return true;}
-    friend bool is_write_access_needed( const_accessor const& )     { return false;}
-    friend bool is_write_access_needed( accessor_not_used const& )  { return false;}
-
-    template <typename Accessor>
-    bool generic_move_insert( Accessor && result, value_type && value ) {
-        result.release();
-        return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct );
-    }
-
-    template <typename Accessor, typename... Args>
-    bool generic_emplace( Accessor && result, Args &&... args ) {
-        result.release();
-        node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...);
-        return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr );
-    }
-
-    // delete item by accessor
-    bool exclude( const_accessor &item_accessor ) {
-        __TBB_ASSERT( item_accessor.my_node, nullptr );
-        node_base *const exclude_node = item_accessor.my_node;
-        hashcode_type const hash = item_accessor.my_hash;
-        hashcode_type mask = this->my_mask.load(std::memory_order_acquire);
-        do {
-            // get bucket
-            bucket_accessor b( this, hash & mask, /*writer=*/true );
-            node_base* prev = nullptr;
-            node_base* curr = b()->node_list.load(std::memory_order_relaxed);
-
-            while (curr && curr != exclude_node) {
-                prev = curr;
-                curr = curr->next;
-            }
-
-            if (curr == nullptr) { // someone else was first
-                if (this->check_mask_race(hash, mask))
-                    continue;
-                item_accessor.release();
-                return false;
-            }
-            __TBB_ASSERT( curr == exclude_node, nullptr );
-            // remove from container
-            if (prev == nullptr) {
-                b()->node_list.store(curr->next, std::memory_order_relaxed);
-            } else {
-                prev->next = curr->next;
-            }
-
-            this->my_size--;
-            break;
-        } while(true);
-        if (!item_accessor.is_writer()) { // need to get exclusive lock
-            item_accessor.upgrade_to_writer(); // return value means nothing here
-        }
-
-        item_accessor.release();
-        delete_node(exclude_node); // Only one thread can delete it
-        return true;
-    }
-
-    // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true)
-    template <typename I>
-    std::pair<I, I> internal_equal_range( const Key& key, I end_ ) const {
-        hashcode_type h = my_hash_compare.hash( key );
-        hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
-        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-        h &= m;
-        bucket *b = this->get_bucket( h );
-        while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) {
-            m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit
-            b = this->get_bucket( h &= m );
-        }
-        node *n = search_bucket( key, b );
-        if( !n )
-            return std::make_pair(end_, end_);
-        iterator lower(*this, h, b, n), upper(lower);
-        return std::make_pair(lower, ++upper);
-    }
-
-    // Copy "source" to *this, where *this must start out empty.
-    void internal_copy( const concurrent_hash_map& source ) {
-        hashcode_type mask = source.my_mask.load(std::memory_order_relaxed);
-        if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version
-            this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor?
-            bucket *dst = 0, *src = 0;
-            bool rehash_required = false;
-            for( hashcode_type k = 0; k <= mask; k++ ) {
-                if( k & (k-2) ) ++dst,src++; // not the beginning of a segment
-                else { dst = this->get_bucket( k ); src = source.get_bucket( k ); }
-                __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table");
-                node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) );
-                if( n == rehash_req ) { // source is not rehashed, items are in previous buckets
-                    rehash_required = true;
-                    dst->node_list.store(rehash_req, std::memory_order_relaxed);
-                } else for(; n; n = static_cast<node*>( n->next ) ) {
-                    node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second);
-                    this->add_to_bucket( dst, node_ptr);
-                    this->my_size.fetch_add(1, std::memory_order_relaxed);
-                }
-            }
-            if( rehash_required ) rehash();
-        } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed));
-    }
-
-    template <typename I>
-    void internal_copy( I first, I last, size_type reserve_size ) {
-        this->reserve(reserve_size); // TODO: load_factor?
-        hashcode_type m = this->my_mask.load(std::memory_order_relaxed);
-        for(; first != last; ++first) {
-            hashcode_type h = my_hash_compare.hash( (*first).first );
-            bucket *b = this->get_bucket( h & m );
-            __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table");
-            node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second);
-            this->add_to_bucket( b, node_ptr );
-            ++this->my_size; // TODO: replace by non-atomic op
-        }
-    }
-
-    void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&,
-                                                /*is_always_equal=*/std::true_type )
-    {
-        this->internal_move(std::move(other));
-    }
-
-    void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a,
-                                                /*is_always_equal=*/std::false_type )
-    {
-        if (a == other.get_allocator()){
-            this->internal_move(std::move(other));
-        } else {
-            try_call( [&] {
-                internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()),
-                    other.size());
-            }).on_exception( [&] {
-                this->clear();
-            });
-        }
-    }
-
-    void internal_move_assign( concurrent_hash_map&& other,
-        /*is_always_equal || POCMA = */std::true_type)
-    {
-        this->internal_move(std::move(other));
-    }
-
-    void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) {
-        if (this->my_allocator == other.my_allocator) {
-            this->internal_move(std::move(other));
-        } else {
-            //do per element move
-            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()),
-                other.size());
-        }
-    }
-
-    void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) {
-        this->internal_swap_content(other);
-    }
-
-    void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) {
-        __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr);
-        this->internal_swap_content(other);
-    }
-
-    // Fast find when no concurrent erasure is used. For internal use inside TBB only!
-    /** Return pointer to item with given key, or nullptr if no such item exists.
-        Must not be called concurrently with erasure operations. */
-    const_pointer internal_fast_find( const Key& key ) const {
-        hashcode_type h = my_hash_compare.hash( key );
-        hashcode_type m = this->my_mask.load(std::memory_order_acquire);
-        node *n;
-    restart:
-        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid");
-        bucket *b = this->get_bucket( h & m );
-        // TODO: actually, notification is unnecessary here, just hiding double-check
-        if( b->node_list.load(std::memory_order_acquire) == rehash_req )
-        {
-            typename bucket::scoped_type lock;
-            if( lock.try_acquire( b->mutex, /*write=*/true ) ) {
-                if( b->node_list.load(std::memory_order_relaxed) == rehash_req)
-                    const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing
-            }
-            else lock.acquire( b->mutex, /*write=*/false );
-            __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr);
-        }
-        n = search_bucket( key, b );
-        if( n )
-            return n->storage();
-        else if( this->check_mask_race( h, m ) )
-            goto restart;
-        return 0;
-    }
-};
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename It,
-          typename HashCompare = tbb_hash_compare<iterator_key_t<It>>,
-          typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<HashCompare>>>
-concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() )
--> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_hash_map( It, It, Alloc )
--> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, tbb_hash_compare<iterator_key_t<It>>, Alloc>;
-
-template <typename Key, typename T,
-          typename HashCompare = tbb_hash_compare<std::remove_const_t<Key>>,
-          typename Alloc = tbb_allocator<std::pair<const Key, T>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<HashCompare>>>
-concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() )
--> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc )
--> concurrent_hash_map<std::remove_const_t<Key>, T, tbb_hash_compare<std::remove_const_t<Key>>, Alloc>;
-
-#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
-
-template <typename Key, typename T, typename HashCompare, typename A1, typename A2>
-inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) {
-    if(a.size() != b.size()) return false;
-    typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end());
-    typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end());
-    for(; i != i_end; ++i) {
-        j = b.equal_range(i->first).first;
-        if( j == j_end || !(i->second == j->second) ) return false;
-    }
-    return true;
-}
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template <typename Key, typename T, typename HashCompare, typename A1, typename A2>
-inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b)
-{    return !(a == b); }
-#endif // !__TBB_CPP20_COMPARISONS_PRESENT
-
-template <typename Key, typename T, typename HashCompare, typename A>
-inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b)
-{    a.swap( b ); }
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::split;
-    using detail::d1::concurrent_hash_map;
-    using detail::d1::tbb_hash_compare;
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_concurrent_hash_map_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_hash_map_H 
+#define __TBB_concurrent_hash_map_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_utils.h" 
+#include "detail/_assert.h" 
+#include "detail/_allocator_traits.h" 
+#include "detail/_containers_helpers.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_hash_compare.h" 
+#include "detail/_range_common.h" 
+#include "tbb_allocator.h" 
+#include "spin_rw_mutex.h" 
+ 
+#include <atomic> 
+#include <initializer_list> 
+#include <tuple> 
+#include <iterator> 
+#include <utility>      // Need std::pair 
+#include <cstring>      // Need std::memset 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+struct hash_map_node_base : no_copy { 
+    using mutex_type = spin_rw_mutex; 
+    // Scoped lock type for mutex 
+    using scoped_type = mutex_type::scoped_lock; 
+    // Next node in chain 
+    hash_map_node_base* next; 
+    mutex_type mutex; 
+}; 
+ 
+// Incompleteness flag value 
+static hash_map_node_base* const rehash_req = reinterpret_cast<hash_map_node_base*>(std::size_t(3)); 
+// Rehashed empty bucket flag 
+static hash_map_node_base* const empty_rehashed = reinterpret_cast<hash_map_node_base*>(std::size_t(0)); 
+ 
+// base class of concurrent_hash_map 
+ 
+template <typename Allocator> 
+class hash_map_base { 
+public: 
+    using size_type = std::size_t; 
+    using hashcode_type = std::size_t; 
+    using segment_index_type = std::size_t; 
+    using node_base = hash_map_node_base; 
+ 
+    struct bucket : no_copy { 
+        using mutex_type = spin_rw_mutex; 
+        using scoped_type = mutex_type::scoped_lock; 
+ 
+        bucket() : node_list(nullptr) {} 
+        bucket( node_base* ptr ) : node_list(ptr) {} 
+ 
+        mutex_type mutex; 
+        std::atomic<node_base*> node_list; 
+    }; 
+ 
+    using allocator_type = Allocator; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+    using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>; 
+    using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>; 
+ 
+    // Count of segments in the first block 
+    static constexpr size_type embedded_block = 1; 
+    // Count of segments in the first block 
+    static constexpr size_type embedded_buckets = 1 << embedded_block; 
+    // Count of segments in the first block 
+    static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 
+    // Size of a pointer / table size 
+    static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit 
+ 
+    using segment_ptr_type = bucket*; 
+    using atomic_segment_type = std::atomic<segment_ptr_type>; 
+    using segments_table_type = atomic_segment_type[pointers_per_table]; 
+ 
+    hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { 
+        for (size_type i = 0; i != embedded_buckets; ++i) { 
+            my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); 
+        } 
+ 
+        for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { 
+            auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; 
+            my_table[segment_index].store(argument, std::memory_order_relaxed); 
+        } 
+ 
+        __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); 
+    } 
+ 
+    // segment index of given index in the array 
+    static segment_index_type segment_index_of( size_type index ) { 
+        return segment_index_type(tbb::detail::log2( index|1 )); 
+    } 
+ 
+    // the first array index of given segment 
+    static segment_index_type segment_base( segment_index_type k ) { 
+        return (segment_index_type(1) << k & ~segment_index_type(1)); 
+    } 
+ 
+    // segment size except for k == 0 
+    static size_type segment_size( segment_index_type k ) { 
+        return size_type(1) << k; // fake value for k==0 
+    } 
+ 
+    // true if ptr is valid pointer 
+    static bool is_valid( void* ptr ) { 
+        return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63); 
+    } 
+ 
+    template <typename... Args> 
+    void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { 
+        for (size_type i = 0; i < sz; ++i) { 
+            bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...); 
+        } 
+    } 
+ 
+    // Initialize buckets 
+    void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { 
+        if (is_initial) { 
+            init_buckets_impl(ptr, sz); 
+        } else { 
+            init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req)); 
+        } 
+    } 
+ 
+    // Add node n to bucket b 
+    static void add_to_bucket( bucket* b, node_base* n ) { 
+        __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); 
+        n->next = b->node_list.load(std::memory_order_relaxed); 
+        b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set 
+    } 
+ 
+    const bucket_allocator_type& get_allocator() const { 
+        return my_allocator; 
+    } 
+ 
+    bucket_allocator_type& get_allocator() { 
+        return my_allocator; 
+    } 
+ 
+    // Enable segment 
+    void enable_segment( segment_index_type k, bool is_initial = false ) { 
+        __TBB_ASSERT( k, "Zero segment must be embedded" ); 
+        size_type sz; 
+        __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); 
+        if (k >= first_block) { 
+            sz = segment_size(k); 
+            segment_ptr_type ptr = nullptr; 
+            try_call( [&] { 
+                ptr = bucket_allocator_traits::allocate(my_allocator, sz); 
+            } ).on_exception( [&] { 
+                my_table[k].store(nullptr, std::memory_order_relaxed); 
+            }); 
+ 
+            __TBB_ASSERT(ptr, nullptr); 
+            init_buckets(ptr, sz, is_initial); 
+            my_table[k].store(ptr, std::memory_order_release); 
+            sz <<= 1;// double it to get entire capacity of the container 
+        } else { // the first block 
+            __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); 
+            sz = segment_size(first_block); 
+            segment_ptr_type ptr = nullptr; 
+            try_call( [&] { 
+                ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); 
+            } ).on_exception( [&] { 
+                my_table[k].store(nullptr, std::memory_order_relaxed); 
+            }); 
+ 
+            __TBB_ASSERT(ptr, nullptr); 
+            init_buckets(ptr, sz - embedded_buckets, is_initial); 
+            ptr -= segment_base(embedded_block); 
+            for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets 
+                my_table[i].store(ptr + segment_base(i), std::memory_order_release); 
+        } 
+        my_mask.store(sz-1, std::memory_order_release); 
+    } 
+ 
+    void delete_segment( segment_index_type s ) { 
+        segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); 
+        size_type sz = segment_size( s ? s : 1 ); 
+ 
+        size_type deallocate_size = 0; 
+ 
+        if (s >= first_block) { // the first segment or the next 
+            deallocate_size = sz; 
+        } else if (s == embedded_block && embedded_block != first_block) { 
+            deallocate_size = segment_size(first_block) - embedded_buckets; 
+        } 
+ 
+        for (size_type i = 0; i < deallocate_size; ++i) { 
+            bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i); 
+        } 
+        if (deallocate_size != 0) { 
+            bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size); 
+        } 
+ 
+        if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); 
+    } 
+ 
+    // Get bucket by (masked) hashcode 
+    bucket *get_bucket( hashcode_type h ) const noexcept { 
+        segment_index_type s = segment_index_of( h ); 
+        h -= segment_base(s); 
+        segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); 
+        __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); 
+        return &seg[h]; 
+    } 
+ 
+    // detail serial rehashing helper 
+    void mark_rehashed_levels( hashcode_type h ) noexcept { 
+        segment_index_type s = segment_index_of( h ); 
+        while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) 
+            if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) { 
+                seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed); 
+                mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s) 
+            } 
+    } 
+ 
+    // Check for mask race 
+    // Splitting into two functions should help inlining 
+    inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const { 
+        hashcode_type m_now, m_old = m; 
+        m_now = my_mask.load(std::memory_order_acquire); 
+        if (m_old != m_now) { 
+            return check_rehashing_collision(h, m_old, m = m_now); 
+        } 
+        return false; 
+    } 
+ 
+    // Process mask race, check for rehashing collision 
+    bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const { 
+        __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m 
+        if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event 
+            // condition above proves that 'h' has some other bits set beside 'm_old' 
+            // find next applicable mask after m_old    //TODO: look at bsl instruction 
+            for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size 
+                ; 
+            m_old = (m_old<<1) - 1; // get full mask from a bit 
+            __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr); 
+            // check whether it is rehashing/ed 
+            if( get_bucket(h & m_old)->node_list.load(std::memory_order_acquire) != rehash_req ) { 
+                return true; 
+            } 
+        } 
+        return false; 
+    } 
+ 
+    // Insert a node and check for load factor. @return segment index to enable. 
+    segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { 
+        size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted 
+        add_to_bucket( b, n ); 
+        // check load factor 
+        if( sz >= mask ) { // TODO: add custom load_factor 
+            segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of 
+            __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); 
+            static const segment_ptr_type is_allocating = segment_ptr_type(2);; 
+            segment_ptr_type disabled = nullptr; 
+            if (!(my_table[new_seg].load(std::memory_order_acquire)) 
+                && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) 
+                return new_seg; // The value must be processed 
+        } 
+        return 0; 
+    } 
+ 
+    // Prepare enough segments for number of buckets 
+    void reserve(size_type buckets) { 
+        if( !buckets-- ) return; 
+        bool is_initial = !my_size.load(std::memory_order_relaxed); 
+        for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; 
+            m = my_mask.load(std::memory_order_relaxed)) 
+        { 
+            enable_segment( segment_index_of( m+1 ), is_initial ); 
+        } 
+    } 
+ 
+    // Swap hash_map_bases 
+    void internal_swap_content(hash_map_base &table) { 
+        using std::swap; 
+        swap_atomics_relaxed(my_mask, table.my_mask); 
+        swap_atomics_relaxed(my_size, table.my_size); 
+ 
+        for(size_type i = 0; i < embedded_buckets; i++) { 
+            auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); 
+            my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), 
+                std::memory_order_relaxed); 
+            table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); 
+        } 
+        for(size_type i = embedded_block; i < pointers_per_table; i++) { 
+            auto temp = my_table[i].load(std::memory_order_relaxed); 
+            my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), 
+                std::memory_order_relaxed); 
+            table.my_table[i].store(temp, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+    void internal_move(hash_map_base&& other) { 
+        my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); 
+ 
+        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_size.store(0, std::memory_order_relaxed); 
+ 
+        for (size_type i = 0; i < embedded_buckets; ++i) { 
+            my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); 
+            other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); 
+        } 
+ 
+        for (size_type i = embedded_block; i < pointers_per_table; ++i) { 
+            my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), 
+                std::memory_order_relaxed); 
+            other.my_table[i].store(nullptr, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+protected: 
+ 
+    bucket_allocator_type my_allocator; 
+    // Hash mask = sum of allocated segment sizes - 1 
+    std::atomic<hashcode_type> my_mask; 
+    // Size of container in stored items 
+    std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects 
+    // Zero segment 
+    bucket my_embedded_segment[embedded_buckets]; 
+    // Segment pointers table. Also prevents false sharing between my_mask and my_size 
+    segments_table_type my_table; 
+}; 
+ 
+template <typename Iterator> 
+class hash_map_range; 
+ 
+// Meets requirements of a forward iterator for STL 
+// Value is either the T or const T type of the container. 
+template <typename Container, typename Value> 
+class hash_map_iterator { 
+    using map_type = Container; 
+    using node = typename Container::node; 
+    using map_base = typename Container::base_type; 
+    using node_base = typename map_base::node_base; 
+    using bucket = typename map_base::bucket; 
+public: 
+    using value_type = Value; 
+    using size_type = typename Container::size_type; 
+    using difference_type = typename Container::difference_type; 
+    using pointer = value_type*; 
+    using reference = value_type&; 
+    using iterator_category = std::forward_iterator_tag; 
+ 
+    // Construct undefined iterator 
+    hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} 
+    hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) : 
+        my_map(other.my_map), 
+        my_index(other.my_index), 
+        my_bucket(other.my_bucket), 
+        my_node(other.my_node) 
+    {} 
+ 
+    hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) { 
+        my_map = other.my_map; 
+        my_index = other.my_index; 
+        my_bucket = other.my_bucket; 
+        my_node = other.my_node; 
+        return *this; 
+    } 
+ 
+    Value& operator*() const { 
+        __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); 
+        return my_node->value(); 
+    } 
+ 
+    Value* operator->() const {return &operator*();} 
+ 
+    hash_map_iterator& operator++() { 
+        my_node = static_cast<node*>( my_node->next ); 
+        if( !my_node ) advance_to_next_bucket(); 
+        return *this; 
+    } 
+ 
+    // Post increment 
+    hash_map_iterator operator++(int) { 
+        hash_map_iterator old(*this); 
+        operator++(); 
+        return old; 
+    } 
+private: 
+    template <typename C, typename T, typename U> 
+    friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); 
+ 
+    template <typename C, typename T, typename U> 
+    friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); 
+ 
+    template <typename C, typename T, typename U> 
+    friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); 
+ 
+    template <typename C, typename U> 
+    friend class hash_map_iterator; 
+ 
+    template <typename I> 
+    friend class hash_map_range; 
+ 
+    void advance_to_next_bucket() { // TODO?: refactor to iterator_base class 
+        size_t k = my_index+1; 
+        __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); 
+        while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { 
+            // Following test uses 2's-complement wizardry 
+            if( k&(k-2) ) // not the beginning of a segment 
+                ++my_bucket; 
+            else my_bucket = my_map->get_bucket( k ); 
+            my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) ); 
+            if( map_base::is_valid(my_node) ) { 
+                my_index = k; return; 
+            } 
+            ++k; 
+        } 
+        my_bucket = 0; my_node = 0; my_index = k; // the end 
+    } 
+ 
+    template <typename Key, typename T, typename HashCompare, typename A> 
+    friend class concurrent_hash_map; 
+ 
+    hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : 
+        my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n)) 
+    { 
+        if( b && !map_base::is_valid(n) ) 
+            advance_to_next_bucket(); 
+    } 
+ 
+    // concurrent_hash_map over which we are iterating. 
+    const Container *my_map; 
+    // Index in hash table for current item 
+    size_t my_index; 
+    // Pointer to bucket 
+    const bucket* my_bucket; 
+    // Pointer to node that has current item 
+    node* my_node; 
+}; 
+ 
+template <typename Container, typename T, typename U> 
+bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { 
+    return i.my_node == j.my_node && i.my_map == j.my_map; 
+} 
+ 
+template <typename Container, typename T, typename U> 
+bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { 
+    return i.my_node != j.my_node || i.my_map != j.my_map; 
+} 
+ 
+// Range class used with concurrent_hash_map 
+template <typename Iterator> 
+class hash_map_range { 
+    using map_type = typename Iterator::map_type; 
+public: 
+    // Type for size of a range 
+    using size_type = std::size_t; 
+    using value_type = typename Iterator::value_type; 
+    using reference = typename Iterator::reference; 
+    using difference_type = typename Iterator::difference_type; 
+    using iterator = Iterator; 
+ 
+    // True if range is empty. 
+    bool empty() const {return my_begin == my_end;} 
+ 
+    // True if range can be partitioned into two subranges. 
+    bool is_divisible() const { 
+        return my_midpoint != my_end; 
+    } 
+ 
+    // Split range. 
+    hash_map_range( hash_map_range& r, split ) : 
+        my_end(r.my_end), 
+        my_grainsize(r.my_grainsize) 
+    { 
+        r.my_end = my_begin = r.my_midpoint; 
+        __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); 
+        __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); 
+        set_midpoint(); 
+        r.set_midpoint(); 
+    } 
+ 
+    // Init range with container and grainsize specified 
+    hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : 
+        my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), 
+        my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), 
+        my_grainsize( grainsize_ ) 
+    { 
+        __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); 
+        set_midpoint(); 
+    } 
+ 
+    const Iterator begin() const { return my_begin; } 
+    const Iterator end() const { return my_end; } 
+    // The grain size for this range. 
+    size_type grainsize() const { return my_grainsize; } 
+ 
+private: 
+    Iterator my_begin; 
+    Iterator my_end; 
+    mutable Iterator my_midpoint; 
+    size_t my_grainsize; 
+    // Set my_midpoint to point approximately half way between my_begin and my_end. 
+    void set_midpoint() const; 
+    template <typename U> friend class hash_map_range; 
+}; 
+ 
+template <typename Iterator> 
+void hash_map_range<Iterator>::set_midpoint() const { 
+    // Split by groups of nodes 
+    size_t m = my_end.my_index-my_begin.my_index; 
+    if( m > my_grainsize ) { 
+        m = my_begin.my_index + m/2u; 
+        auto b = my_begin.my_map->get_bucket(m); 
+        my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); 
+    } else { 
+        my_midpoint = my_end; 
+    } 
+    __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, 
+        "my_begin is after my_midpoint" ); 
+    __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, 
+        "my_midpoint is after my_end" ); 
+    __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, 
+        "[my_begin, my_midpoint) range should not be empty" ); 
+} 
+ 
+template <typename Key, typename T, 
+          typename HashCompare = tbb_hash_compare<Key>, 
+          typename Allocator = tbb_allocator<std::pair<const Key, T>>> 
+class concurrent_hash_map : protected hash_map_base<Allocator> { 
+    template <typename Container, typename Value> 
+    friend class hash_map_iterator; 
+ 
+    template <typename I> 
+    friend class hash_map_range; 
+    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 
+public: 
+    using base_type = hash_map_base<Allocator>; 
+    using key_type = Key; 
+    using mapped_type = T; 
+    // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors 
+    // and copy/move constructor with explicit allocator argument 
+    using allocator_type = tbb::detail::type_identity_t<Allocator>; 
+    using hash_compare_type = tbb::detail::type_identity_t<HashCompare>; 
+    using value_type = std::pair<const Key, T>; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using iterator = hash_map_iterator<concurrent_hash_map, value_type>; 
+    using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>; 
+    using range_type = hash_map_range<iterator>; 
+    using const_range_type = hash_map_range<const_iterator>; 
+ 
+protected: 
+    static_assert(std::is_same<value_type, typename Allocator::value_type>::value, 
+        "value_type of the container must be the same as its allocator's"); 
+ 
+    friend class const_accessor; 
+    class node; 
+    using segment_index_type = typename base_type::segment_index_type; 
+    using segment_ptr_type = typename base_type::segment_ptr_type; 
+    using node_base = typename base_type::node_base; 
+    using bucket = typename base_type::bucket; 
+    using hashcode_type = typename base_type::hashcode_type; 
+    using bucket_allocator_type = typename base_type::bucket_allocator_type; 
+    using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>; 
+    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; 
+    hash_compare_type my_hash_compare; 
+ 
+    class node : public node_base { 
+    public: 
+        node() {} 
+        ~node() {} 
+        pointer storage() { return &my_value; } 
+        value_type& value() { return *storage(); } 
+    private: 
+        union { 
+            value_type my_value; 
+        }; 
+    }; 
+ 
+    void delete_node( node_base *n ) { 
+        node_allocator_type node_allocator(this->get_allocator()); 
+        node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage()); 
+        node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)); 
+        node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1); 
+    } 
+ 
+    template <typename... Args> 
+    static node* create_node(bucket_allocator_type& allocator, Args&&... args) { 
+        node_allocator_type node_allocator(allocator); 
+        node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); 
+        auto guard = make_raii_guard([&] { 
+            node_allocator_traits::destroy(node_allocator, node_ptr); 
+            node_allocator_traits::deallocate(node_allocator, node_ptr, 1); 
+        }); 
+ 
+        node_allocator_traits::construct(node_allocator, node_ptr); 
+        node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...); 
+        guard.dismiss(); 
+        return node_ptr; 
+    } 
+ 
+    static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ 
+        return create_node(allocator, key, *t); 
+    } 
+ 
+    static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ 
+        return create_node(allocator, key, std::move(*const_cast<T*>(t))); 
+    } 
+ 
+    static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){ 
+        // Emplace construct an empty T object inside the pair 
+        return create_node(allocator, std::piecewise_construct, 
+                           std::forward_as_tuple(key), std::forward_as_tuple()); 
+    } 
+ 
+    static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ 
+        __TBB_ASSERT(false,"this dummy function should not be called"); 
+        return nullptr; 
+    } 
+ 
+    node *search_bucket( const key_type &key, bucket *b ) const { 
+        node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) ); 
+        while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) 
+            n = static_cast<node*>( n->next ); 
+        __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket"); 
+        return n; 
+    } 
+ 
+    // bucket accessor is to find, rehash, acquire a lock, and access a bucket 
+    class bucket_accessor : public bucket::scoped_type { 
+        bucket *my_b; 
+    public: 
+        bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } 
+        // find a bucket by masked hashcode, optionally rehash, and acquire the lock 
+        inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { 
+            my_b = base->get_bucket( h ); 
+            // TODO: actually, notification is unnecessary here, just hiding double-check 
+            if( my_b->node_list.load(std::memory_order_acquire) == rehash_req 
+                && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) 
+            { 
+                if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing 
+            } 
+            else bucket::scoped_type::acquire( my_b->mutex, writer ); 
+            __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); 
+        } 
+        // check whether bucket is locked for write 
+        bool is_writer() { return bucket::scoped_type::m_is_writer; } 
+        // get bucket pointer 
+        bucket *operator() () { return my_b; } 
+    }; 
+ 
+    // TODO refactor to hash_base 
+    void rehash_bucket( bucket *b_new, const hashcode_type hash ) { 
+        __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)"); 
+        __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); 
+        b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed 
+        hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit 
+        bucket_accessor b_old( this, hash & mask ); 
+ 
+        mask = (mask<<1) | 1; // get full mask for new bucket 
+        __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); 
+    restart: 
+        node_base* prev = nullptr; 
+        node_base* curr = b_old()->node_list.load(std::memory_order_acquire); 
+        while (this->is_valid(curr)) { 
+            hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); 
+ 
+            if ((curr_node_hash & mask) == hash) { 
+                if (!b_old.is_writer()) { 
+                    if (!b_old.upgrade_to_writer()) { 
+                        goto restart; // node ptr can be invalid due to concurrent erase 
+                    } 
+                } 
+                node_base* next = curr->next; 
+                // exclude from b_old 
+                if (prev == nullptr) { 
+                    b_old()->node_list.store(curr->next, std::memory_order_relaxed); 
+                } else { 
+                    prev->next = curr->next; 
+                } 
+                this->add_to_bucket(b_new, curr); 
+                curr = next; 
+            } else { 
+                prev = curr; 
+                curr = curr->next; 
+            } 
+        } 
+    } 
+ 
+public: 
+ 
+    class accessor; 
+    // Combines data access, locking, and garbage collection. 
+    class const_accessor : private node::scoped_type /*which derived from no_copy*/ { 
+        friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; 
+        friend class accessor; 
+    public: 
+        // Type of value 
+        using value_type = const typename concurrent_hash_map::value_type; 
+ 
+        // True if result is empty. 
+        bool empty() const { return !my_node; } 
+ 
+        // Set to null 
+        void release() { 
+            if( my_node ) { 
+                node::scoped_type::release(); 
+                my_node = 0; 
+            } 
+        } 
+ 
+        // Return reference to associated value in hash table. 
+        const_reference operator*() const { 
+            __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); 
+            return my_node->value(); 
+        } 
+ 
+        // Return pointer to associated value in hash table. 
+        const_pointer operator->() const { 
+            return &operator*(); 
+        } 
+ 
+        // Create empty result 
+        const_accessor() : my_node(nullptr) {} 
+ 
+        // Destroy result after releasing the underlying reference. 
+        ~const_accessor() { 
+            my_node = nullptr; // scoped lock's release() is called in its destructor 
+        } 
+    protected: 
+        bool is_writer() { return node::scoped_type::m_is_writer; } 
+        node *my_node; 
+        hashcode_type my_hash; 
+    }; 
+ 
+    // Allows write access to elements and combines data access, locking, and garbage collection. 
+    class accessor: public const_accessor { 
+    public: 
+        // Type of value 
+        using value_type = typename concurrent_hash_map::value_type; 
+ 
+        // Return reference to associated value in hash table. 
+        reference operator*() const { 
+            __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); 
+            return this->my_node->value(); 
+        } 
+ 
+        // Return pointer to associated value in hash table. 
+        pointer operator->() const { 
+            return &operator*(); 
+        } 
+    }; 
+ 
+    explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() ) 
+        : base_type(a) 
+        , my_hash_compare(compare) 
+    {} 
+ 
+    concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {} 
+ 
+    explicit concurrent_hash_map( const allocator_type& a ) 
+        : concurrent_hash_map(hash_compare_type(), a) 
+    {} 
+ 
+    // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. 
+    concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) 
+        : concurrent_hash_map(a) 
+    { 
+        this->reserve(n); 
+    } 
+ 
+    concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) 
+        : concurrent_hash_map(compare, a) 
+    { 
+        this->reserve(n); 
+    } 
+ 
+    // Copy constructor 
+    concurrent_hash_map( const concurrent_hash_map &table ) 
+        : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) 
+    { 
+        try_call( [&] { 
+            internal_copy(table); 
+        }).on_exception( [&] { 
+            this->clear(); 
+        }); 
+    } 
+ 
+    concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) 
+        : concurrent_hash_map(a) 
+    { 
+        try_call( [&] { 
+            internal_copy(table); 
+        }).on_exception( [&] { 
+            this->clear(); 
+        }); 
+    } 
+ 
+    // Move constructor 
+    concurrent_hash_map( concurrent_hash_map &&table ) 
+        : concurrent_hash_map(std::move(table.get_allocator())) 
+    { 
+        this->internal_move(std::move(table)); 
+    } 
+ 
+    // Move constructor 
+    concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) 
+        : concurrent_hash_map(a) 
+    { 
+        using is_equal_type = typename node_allocator_traits::is_always_equal; 
+        internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); 
+    } 
+ 
+    // Construction with copying iteration range and given allocator instance 
+    template <typename I> 
+    concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) 
+        : concurrent_hash_map(a) 
+    { 
+        try_call( [&] { 
+            internal_copy(first, last, std::distance(first, last)); 
+        }).on_exception( [&] { 
+            this->clear(); 
+        }); 
+    } 
+ 
+    template <typename I> 
+    concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) 
+        : concurrent_hash_map(compare, a) 
+    { 
+        try_call( [&] { 
+            internal_copy(first, last, std::distance(first, last)); 
+        }).on_exception( [&] { 
+            this->clear(); 
+        }); 
+    } 
+ 
+    concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() ) 
+        : concurrent_hash_map(compare, a) 
+    { 
+        try_call( [&] { 
+            internal_copy(il.begin(), il.end(), il.size()); 
+        }).on_exception( [&] { 
+            this->clear(); 
+        }); 
+    } 
+ 
+    concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a ) 
+        : concurrent_hash_map(il, hash_compare_type(), a) {} 
+ 
+    // Assignment 
+    concurrent_hash_map& operator=( const concurrent_hash_map &table ) { 
+        if( this != &table ) { 
+            clear(); 
+            copy_assign_allocators(this->my_allocator, table.my_allocator); 
+            internal_copy(table); 
+        } 
+        return *this; 
+    } 
+ 
+    // Move Assignment 
+    concurrent_hash_map& operator=( concurrent_hash_map &&table ) { 
+        if( this != &table ) { 
+            using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; 
+            using is_equal_type = typename node_allocator_traits::is_always_equal; 
+            move_assign_allocators(this->my_allocator, table.my_allocator); 
+            internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>()); 
+        } 
+        return *this; 
+    } 
+ 
+    // Assignment 
+    concurrent_hash_map& operator=( std::initializer_list<value_type> il ) { 
+        clear(); 
+        internal_copy(il.begin(), il.end(), il.size()); 
+        return *this; 
+    } 
+ 
+    // Rehashes and optionally resizes the whole table. 
+    /** Useful to optimize performance before or after concurrent operations. 
+        Also enables using of find() and count() concurrent methods in serial context. */ 
+    void rehash(size_type sz = 0) { 
+        this->reserve(sz); // TODO: add reduction of number of buckets as well 
+        hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); 
+        hashcode_type b = (mask+1)>>1; // size or first index of the last segment 
+        __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 
+        bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing 
+        for(; b <= mask; b++, bp++ ) { 
+            node_base *n = bp->node_list.load(std::memory_order_relaxed); 
+            __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" ); 
+            __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); 
+            if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one 
+                hashcode_type h = b; bucket *b_old = bp; 
+                do { 
+                    __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); 
+                    hashcode_type m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit 
+                    b_old = this->get_bucket( h &= m ); 
+                } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req ); 
+                // now h - is index of the root rehashed bucket b_old 
+                this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments 
+                node_base* prev = nullptr; 
+                node_base* curr = b_old->node_list.load(std::memory_order_relaxed); 
+                while (this->is_valid(curr)) { 
+                    hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); 
+ 
+                    if ((curr_node_hash & mask) != h) { // should be rehashed 
+                        node_base* next = curr->next; 
+                        // exclude from b_old 
+                        if (prev == nullptr) { 
+                            b_old->node_list.store(curr->next, std::memory_order_relaxed); 
+                        } else { 
+                            prev->next = curr->next; 
+                        } 
+                        bucket *b_new = this->get_bucket(curr_node_hash & mask); 
+                        __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" ); 
+                        this->add_to_bucket(b_new, curr); 
+                        curr = next; 
+                    } else { 
+                        prev = curr; 
+                        curr = curr->next; 
+                    } 
+                } 
+            } 
+        } 
+    } 
+ 
+    // Clear table 
+    void clear() { 
+        hashcode_type m = this->my_mask.load(std::memory_order_relaxed); 
+        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); 
+        this->my_size.store(0, std::memory_order_relaxed); 
+        segment_index_type s = this->segment_index_of( m ); 
+        __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); 
+        do { 
+            __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); 
+            segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); 
+            size_type sz = this->segment_size( s ? s : 1 ); 
+            for( segment_index_type i = 0; i < sz; i++ ) 
+                for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); 
+                    this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) 
+                { 
+                    buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); 
+                    delete_node( n ); 
+                } 
+            this->delete_segment(s); 
+        } while(s-- > 0); 
+        this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); 
+    } 
+ 
+    // Clear table and destroy it. 
+    ~concurrent_hash_map() { clear(); } 
+ 
+    //------------------------------------------------------------------------ 
+    // Parallel algorithm support 
+    //------------------------------------------------------------------------ 
+    range_type range( size_type grainsize=1 ) { 
+        return range_type( *this, grainsize ); 
+    } 
+    const_range_type range( size_type grainsize=1 ) const { 
+        return const_range_type( *this, grainsize ); 
+    } 
+ 
+    //------------------------------------------------------------------------ 
+    // STL support - not thread-safe methods 
+    //------------------------------------------------------------------------ 
+    iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } 
+    const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } 
+    const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } 
+    iterator end() { return iterator( *this, 0, 0, 0 ); } 
+    const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } 
+    const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } 
+    std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); } 
+    std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } 
+ 
+    // Number of items in table. 
+    size_type size() const { return this->my_size.load(std::memory_order_acquire); } 
+ 
+    // True if size()==0. 
+    __TBB_nodiscard bool empty() const { return size() == 0; } 
+ 
+    // Upper bound on size. 
+    size_type max_size() const { 
+        return allocator_traits_type::max_size(base_type::get_allocator()); 
+    } 
+ 
+    // Returns the current number of buckets 
+    size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } 
+ 
+    // return allocator object 
+    allocator_type get_allocator() const { return base_type::get_allocator(); } 
+ 
+    // swap two instances. Iterators are invalidated 
+    void swap(concurrent_hash_map& table) { 
+        using pocs_type = typename node_allocator_traits::propagate_on_container_swap; 
+        using is_equal_type = typename node_allocator_traits::is_always_equal; 
+        swap_allocators(this->my_allocator, table.my_allocator); 
+        internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>()); 
+    } 
+ 
+    //------------------------------------------------------------------------ 
+    // concurrent map operations 
+    //------------------------------------------------------------------------ 
+ 
+    // Return count of items (0 or 1) 
+    size_type count( const Key &key ) const { 
+        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node ); 
+    } 
+ 
+    // Find item and acquire a read lock on the item. 
+    /** Return true if item is found, false otherwise. */ 
+    bool find( const_accessor &result, const Key &key ) const { 
+        result.release(); 
+        return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); 
+    } 
+ 
+    // Find item and acquire a write lock on the item. 
+    /** Return true if item is found, false otherwise. */ 
+    bool find( accessor &result, const Key &key ) { 
+        result.release(); 
+        return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node ); 
+    } 
+ 
+    // Insert item (if not already present) and acquire a read lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( const_accessor &result, const Key &key ) { 
+        result.release(); 
+        return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct ); 
+    } 
+ 
+    // Insert item (if not already present) and acquire a write lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( accessor &result, const Key &key ) { 
+        result.release(); 
+        return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct ); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a read lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( const_accessor &result, const value_type &value ) { 
+        result.release(); 
+        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct ); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a write lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( accessor &result, const value_type &value ) { 
+        result.release(); 
+        return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct ); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already 
+    /** Returns true if item is inserted. */ 
+    bool insert( const value_type &value ) { 
+        return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct ); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a read lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( const_accessor &result, value_type && value ) { 
+        return generic_move_insert(result, std::move(value)); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a write lock on the item. 
+    /** Returns true if item is new. */ 
+    bool insert( accessor &result, value_type && value ) { 
+        return generic_move_insert(result, std::move(value)); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already 
+    /** Returns true if item is inserted. */ 
+    bool insert( value_type && value ) { 
+        return generic_move_insert(accessor_not_used(), std::move(value)); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a read lock on the item. 
+    /** Returns true if item is new. */ 
+    template <typename... Args> 
+    bool emplace( const_accessor &result, Args&&... args ) { 
+        return generic_emplace(result, std::forward<Args>(args)...); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already and acquire a write lock on the item. 
+    /** Returns true if item is new. */ 
+    template <typename... Args> 
+    bool emplace( accessor &result, Args&&... args ) { 
+        return generic_emplace(result, std::forward<Args>(args)...); 
+    } 
+ 
+    // Insert item by copying if there is no such key present already 
+    /** Returns true if item is inserted. */ 
+    template <typename... Args> 
+    bool emplace( Args&&... args ) { 
+        return generic_emplace(accessor_not_used(), std::forward<Args>(args)...); 
+    } 
+ 
+    // Insert range [first, last) 
+    template <typename I> 
+    void insert( I first, I last ) { 
+        for ( ; first != last; ++first ) 
+            insert( *first ); 
+    } 
+ 
+    // Insert initializer list 
+    void insert( std::initializer_list<value_type> il ) { 
+        insert( il.begin(), il.end() ); 
+    } 
+ 
+    // Erase item. 
+    /** Return true if item was erased by particularly this call. */ 
+    bool erase( const Key &key ) { 
+        node_base *erase_node; 
+        hashcode_type const hash = my_hash_compare.hash(key); 
+        hashcode_type mask = this->my_mask.load(std::memory_order_acquire); 
+    restart: 
+        {//lock scope 
+            // get bucket 
+            bucket_accessor b( this, hash & mask ); 
+        search: 
+            node_base* prev = nullptr; 
+            erase_node = b()->node_list.load(std::memory_order_relaxed); 
+            while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) { 
+                prev = erase_node; 
+                erase_node = erase_node->next; 
+            } 
+ 
+            if (erase_node == nullptr) { // not found, but mask could be changed 
+                if (this->check_mask_race(hash, mask)) 
+                    goto restart; 
+                return false; 
+            } else if (!b.is_writer() && !b.upgrade_to_writer()) { 
+                if (this->check_mask_race(hash, mask)) // contended upgrade, check mask 
+                    goto restart; 
+                goto search; 
+            } 
+ 
+            // remove from container 
+            if (prev == nullptr) { 
+                b()->node_list.store(erase_node->next, std::memory_order_relaxed); 
+            } else { 
+                prev->next = erase_node->next; 
+            } 
+            this->my_size--; 
+        } 
+        { 
+            typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); 
+        } 
+        // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! 
+        delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket 
+        return true; 
+    } 
+ 
+    // Erase item by const_accessor. 
+    /** Return true if item was erased by particularly this call. */ 
+    bool erase( const_accessor& item_accessor ) { 
+        return exclude( item_accessor ); 
+    } 
+ 
+    // Erase item by accessor. 
+    /** Return true if item was erased by particularly this call. */ 
+    bool erase( accessor& item_accessor ) { 
+        return exclude( item_accessor ); 
+    } 
+ 
+protected: 
+    // Insert or find item and optionally acquire a lock on the item. 
+    bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&, 
+        const Key&, const T*), node *tmp_n  = 0) 
+    { 
+        __TBB_ASSERT( !result || !result->my_node, nullptr ); 
+        bool return_value; 
+        hashcode_type const h = my_hash_compare.hash( key ); 
+        hashcode_type m = this->my_mask.load(std::memory_order_acquire); 
+        segment_index_type grow_segment = 0; 
+        node *n; 
+        restart: 
+        {//lock scope 
+            __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); 
+            return_value = false; 
+            // get bucket 
+            bucket_accessor b( this, h & m ); 
+            // find a node 
+            n = search_bucket( key, b() ); 
+            if( op_insert ) { 
+                // [opt] insert a key 
+                if( !n ) { 
+                    if( !tmp_n ) { 
+                        tmp_n = allocate_node(base_type::get_allocator(), key, t); 
+                    } 
+                    if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion 
+                        // Rerun search_list, in case another thread inserted the item during the upgrade. 
+                        n = search_bucket( key, b() ); 
+                        if( this->is_valid(n) ) { // unfortunately, it did 
+                            b.downgrade_to_reader(); 
+                            goto exists; 
+                        } 
+                    } 
+                    if( this->check_mask_race(h, m) ) 
+                        goto restart; // b.release() is done in ~b(). 
+                    // insert and set flag to grow the container 
+                    grow_segment = this->insert_new_node( b(), n = tmp_n, m ); 
+                    tmp_n = 0; 
+                    return_value = true; 
+                } 
+            } else { // find or count 
+                if( !n ) { 
+                    if( this->check_mask_race( h, m ) ) 
+                        goto restart; // b.release() is done in ~b(). TODO: replace by continue 
+                    return false; 
+                } 
+                return_value = true; 
+            } 
+        exists: 
+            if( !result ) goto check_growth; 
+            // TODO: the following seems as generic/regular operation 
+            // acquire the item 
+            if( !result->try_acquire( n->mutex, write ) ) { 
+                for( tbb::detail::atomic_backoff backoff(true);; ) { 
+                    if( result->try_acquire( n->mutex, write ) ) break; 
+                    if( !backoff.bounded_pause() ) { 
+                        // the wait takes really long, restart the operation 
+                        b.release(); 
+                        __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" ); 
+                        yield(); 
+                        m = this->my_mask.load(std::memory_order_acquire); 
+                        goto restart; 
+                    } 
+                } 
+            } 
+        }//lock scope 
+        result->my_node = n; 
+        result->my_hash = h; 
+    check_growth: 
+        // [opt] grow the container 
+        if( grow_segment ) { 
+            this->enable_segment( grow_segment ); 
+        } 
+        if( tmp_n ) // if op_insert only 
+            delete_node( tmp_n ); 
+        return return_value; 
+    } 
+ 
+    struct accessor_not_used { void release(){}}; 
+    friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} 
+    friend const_accessor* accessor_location( const_accessor & a )      { return &a;} 
+ 
+    friend bool is_write_access_needed( accessor const& )           { return true;} 
+    friend bool is_write_access_needed( const_accessor const& )     { return false;} 
+    friend bool is_write_access_needed( accessor_not_used const& )  { return false;} 
+ 
+    template <typename Accessor> 
+    bool generic_move_insert( Accessor && result, value_type && value ) { 
+        result.release(); 
+        return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct ); 
+    } 
+ 
+    template <typename Accessor, typename... Args> 
+    bool generic_emplace( Accessor && result, Args &&... args ) { 
+        result.release(); 
+        node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...); 
+        return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr ); 
+    } 
+ 
+    // delete item by accessor 
+    bool exclude( const_accessor &item_accessor ) { 
+        __TBB_ASSERT( item_accessor.my_node, nullptr ); 
+        node_base *const exclude_node = item_accessor.my_node; 
+        hashcode_type const hash = item_accessor.my_hash; 
+        hashcode_type mask = this->my_mask.load(std::memory_order_acquire); 
+        do { 
+            // get bucket 
+            bucket_accessor b( this, hash & mask, /*writer=*/true ); 
+            node_base* prev = nullptr; 
+            node_base* curr = b()->node_list.load(std::memory_order_relaxed); 
+ 
+            while (curr && curr != exclude_node) { 
+                prev = curr; 
+                curr = curr->next; 
+            } 
+ 
+            if (curr == nullptr) { // someone else was first 
+                if (this->check_mask_race(hash, mask)) 
+                    continue; 
+                item_accessor.release(); 
+                return false; 
+            } 
+            __TBB_ASSERT( curr == exclude_node, nullptr ); 
+            // remove from container 
+            if (prev == nullptr) { 
+                b()->node_list.store(curr->next, std::memory_order_relaxed); 
+            } else { 
+                prev->next = curr->next; 
+            } 
+ 
+            this->my_size--; 
+            break; 
+        } while(true); 
+        if (!item_accessor.is_writer()) { // need to get exclusive lock 
+            item_accessor.upgrade_to_writer(); // return value means nothing here 
+        } 
+ 
+        item_accessor.release(); 
+        delete_node(exclude_node); // Only one thread can delete it 
+        return true; 
+    } 
+ 
+    // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) 
+    template <typename I> 
+    std::pair<I, I> internal_equal_range( const Key& key, I end_ ) const { 
+        hashcode_type h = my_hash_compare.hash( key ); 
+        hashcode_type m = this->my_mask.load(std::memory_order_relaxed); 
+        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); 
+        h &= m; 
+        bucket *b = this->get_bucket( h ); 
+        while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) { 
+            m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit 
+            b = this->get_bucket( h &= m ); 
+        } 
+        node *n = search_bucket( key, b ); 
+        if( !n ) 
+            return std::make_pair(end_, end_); 
+        iterator lower(*this, h, b, n), upper(lower); 
+        return std::make_pair(lower, ++upper); 
+    } 
+ 
+    // Copy "source" to *this, where *this must start out empty. 
+    void internal_copy( const concurrent_hash_map& source ) { 
+        hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); 
+        if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version 
+            this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? 
+            bucket *dst = 0, *src = 0; 
+            bool rehash_required = false; 
+            for( hashcode_type k = 0; k <= mask; k++ ) { 
+                if( k & (k-2) ) ++dst,src++; // not the beginning of a segment 
+                else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } 
+                __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); 
+                node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) ); 
+                if( n == rehash_req ) { // source is not rehashed, items are in previous buckets 
+                    rehash_required = true; 
+                    dst->node_list.store(rehash_req, std::memory_order_relaxed); 
+                } else for(; n; n = static_cast<node*>( n->next ) ) { 
+                    node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); 
+                    this->add_to_bucket( dst, node_ptr); 
+                    this->my_size.fetch_add(1, std::memory_order_relaxed); 
+                } 
+            } 
+            if( rehash_required ) rehash(); 
+        } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); 
+    } 
+ 
+    template <typename I> 
+    void internal_copy( I first, I last, size_type reserve_size ) { 
+        this->reserve(reserve_size); // TODO: load_factor? 
+        hashcode_type m = this->my_mask.load(std::memory_order_relaxed); 
+        for(; first != last; ++first) { 
+            hashcode_type h = my_hash_compare.hash( (*first).first ); 
+            bucket *b = this->get_bucket( h & m ); 
+            __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); 
+            node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); 
+            this->add_to_bucket( b, node_ptr ); 
+            ++this->my_size; // TODO: replace by non-atomic op 
+        } 
+    } 
+ 
+    void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, 
+                                                /*is_always_equal=*/std::true_type ) 
+    { 
+        this->internal_move(std::move(other)); 
+    } 
+ 
+    void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, 
+                                                /*is_always_equal=*/std::false_type ) 
+    { 
+        if (a == other.get_allocator()){ 
+            this->internal_move(std::move(other)); 
+        } else { 
+            try_call( [&] { 
+                internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), 
+                    other.size()); 
+            }).on_exception( [&] { 
+                this->clear(); 
+            }); 
+        } 
+    } 
+ 
+    void internal_move_assign( concurrent_hash_map&& other, 
+        /*is_always_equal || POCMA = */std::true_type) 
+    { 
+        this->internal_move(std::move(other)); 
+    } 
+ 
+    void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { 
+        if (this->my_allocator == other.my_allocator) { 
+            this->internal_move(std::move(other)); 
+        } else { 
+            //do per element move 
+            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), 
+                other.size()); 
+        } 
+    } 
+ 
+    void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { 
+        this->internal_swap_content(other); 
+    } 
+ 
+    void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { 
+        __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); 
+        this->internal_swap_content(other); 
+    } 
+ 
+    // Fast find when no concurrent erasure is used. For internal use inside TBB only! 
+    /** Return pointer to item with given key, or nullptr if no such item exists. 
+        Must not be called concurrently with erasure operations. */ 
+    const_pointer internal_fast_find( const Key& key ) const { 
+        hashcode_type h = my_hash_compare.hash( key ); 
+        hashcode_type m = this->my_mask.load(std::memory_order_acquire); 
+        node *n; 
+    restart: 
+        __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); 
+        bucket *b = this->get_bucket( h & m ); 
+        // TODO: actually, notification is unnecessary here, just hiding double-check 
+        if( b->node_list.load(std::memory_order_acquire) == rehash_req ) 
+        { 
+            typename bucket::scoped_type lock; 
+            if( lock.try_acquire( b->mutex, /*write=*/true ) ) { 
+                if( b->node_list.load(std::memory_order_relaxed) == rehash_req) 
+                    const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing 
+            } 
+            else lock.acquire( b->mutex, /*write=*/false ); 
+            __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr); 
+        } 
+        n = search_bucket( key, b ); 
+        if( n ) 
+            return n->storage(); 
+        else if( this->check_mask_race( h, m ) ) 
+            goto restart; 
+        return 0; 
+    } 
+}; 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename It, 
+          typename HashCompare = tbb_hash_compare<iterator_key_t<It>>, 
+          typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<HashCompare>>> 
+concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() ) 
+-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_hash_map( It, It, Alloc ) 
+-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, tbb_hash_compare<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename Key, typename T, 
+          typename HashCompare = tbb_hash_compare<std::remove_const_t<Key>>, 
+          typename Alloc = tbb_allocator<std::pair<const Key, T>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<HashCompare>>> 
+concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() ) 
+-> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc ) 
+-> concurrent_hash_map<std::remove_const_t<Key>, T, tbb_hash_compare<std::remove_const_t<Key>>, Alloc>; 
+ 
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ 
+ 
+template <typename Key, typename T, typename HashCompare, typename A1, typename A2> 
+inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { 
+    if(a.size() != b.size()) return false; 
+    typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end()); 
+    typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end()); 
+    for(; i != i_end; ++i) { 
+        j = b.equal_range(i->first).first; 
+        if( j == j_end || !(i->second == j->second) ) return false; 
+    } 
+    return true; 
+} 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template <typename Key, typename T, typename HashCompare, typename A1, typename A2> 
+inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) 
+{    return !(a == b); } 
+#endif // !__TBB_CPP20_COMPARISONS_PRESENT 
+ 
+template <typename Key, typename T, typename HashCompare, typename A> 
+inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b) 
+{    a.swap( b ); } 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::split; 
+    using detail::d1::concurrent_hash_map; 
+    using detail::d1::tbb_hash_compare; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_concurrent_hash_map_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h
index b83dd5f8c1..d6d0a6d6a4 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h
@@ -1,364 +1,364 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_lru_cache_H
-#define __TBB_concurrent_lru_cache_H
-
-#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE
-    #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h
-#endif
-
-#include "detail/_assert.h"
-#include "detail/_aggregator.h"
-
-#include <map>       // for std::map
-#include <list>      // for std::list
-#include <utility>   // for std::make_pair
-#include <algorithm> // for std::find
-#include <atomic>    // for std::atomic<bool>
-
-namespace tbb {
-
-namespace detail {
-namespace d1 {
-
-//-----------------------------------------------------------------------------
-// Concurrent LRU cache
-//-----------------------------------------------------------------------------
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)>
-class concurrent_lru_cache : no_assign {
-// incapsulated helper classes
-private:
-    struct handle_object;
-    struct storage_map_value_type;
-
-    struct aggregator_operation;
-    struct retrieve_aggregator_operation;
-    struct signal_end_of_usage_aggregator_operation;
-
-// typedefs
-public:
-    using key_type = KeyT;
-    using value_type = ValT;
-    using pointer = ValT*;
-    using reference = ValT&;
-    using const_pointer = const ValT*;
-    using const_reference = const ValT&;
-
-    using value_function_type = KeyToValFunctorT;
-    using handle = handle_object;
-private:
-    using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>;
-
-    using storage_map_type = std::map<key_type, storage_map_value_type>;
-    using storage_map_iterator_type = typename storage_map_type::iterator;
-    using storage_map_pointer_type = typename storage_map_type::pointer;
-    using storage_map_reference_type = typename storage_map_type::reference;
-
-    using history_list_type = std::list<storage_map_iterator_type>;
-    using history_list_iterator_type = typename history_list_type::iterator;
-
-    using aggregator_operation_type = aggregator_operation;
-    using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>;
-    using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>;
-
-    friend class aggregating_functor<lru_cache_type,aggregator_operation_type>;
-
-// fields
-private:
-    value_function_type my_value_function;
-    aggregator_type my_aggregator;
-
-    storage_map_type my_storage_map;            // storage map for used objects
-    history_list_type my_history_list;          // history list for unused objects
-    const std::size_t my_history_list_capacity; // history list's allowed capacity
-
-// interface
-public:
-
-    concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity)
-        : my_value_function(value_function), my_history_list_capacity(cache_capacity) {
-        my_aggregator.initialize_handler(aggregator_function_type(this));
-    }
-
-    handle operator[](key_type key) {
-        retrieve_aggregator_operation op(key);
-        my_aggregator.execute(&op);
-
-        if (op.is_new_value_needed()) {
-            op.result().second.my_value = my_value_function(key);
-            op.result().second.my_is_ready.store(true, std::memory_order_release);
-        } else {
-            spin_wait_while_eq(op.result().second.my_is_ready, false);
-        }
-
-        return handle(*this, op.result());
-    }
-
-private:
-
-    void handle_operations(aggregator_operation* op_list) {
-        while (op_list) {
-            op_list->cast_and_handle(*this);
-            aggregator_operation* prev_op = op_list;
-            op_list = op_list->next;
-
-            (prev_op->status).store(1, std::memory_order_release);
-        }
-    }
-
-    void signal_end_of_usage(storage_map_reference_type map_record_ref) {
-        signal_end_of_usage_aggregator_operation op(map_record_ref);
-        my_aggregator.execute(&op);
-    }
-
-    void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) {
-        storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first);
-
-        __TBB_ASSERT(map_it != my_storage_map.end(),
-            "cache should not return past-end iterators to outer world");
-        __TBB_ASSERT(&(*map_it) == &map_record_ref,
-            "dangling reference has been returned to outside world: data race?");
-        __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(),
-            "object in use should not be in list of unused objects ");
-
-        // if it was the last reference, put it to the LRU history
-        if (! --(map_it->second.my_ref_counter)) {
-            // if the LRU history is full, evict the oldest items to get space
-            if (my_history_list.size() >= my_history_list_capacity) {
-                std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity;
-
-                for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) {
-                    storage_map_iterator_type map_it_to_evict = my_history_list.back();
-
-                    __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0,
-                        "item to be evicted should not have a live references");
-
-                    // TODO: can we use forward_list instead of list? pop_front / insert_after last
-                    my_history_list.pop_back();
-                    my_storage_map.erase(map_it_to_evict);
-                }
-            }
-
-            // TODO: can we use forward_list instead of list? pop_front / insert_after last
-            my_history_list.push_front(map_it);
-            map_it->second.my_history_list_iterator = my_history_list.begin();
-        }
-    }
-
-    storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) {
-        storage_map_iterator_type map_it = my_storage_map.find(key);
-
-        if (map_it == my_storage_map.end()) {
-            map_it = my_storage_map.emplace_hint(
-                map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false));
-            is_new_value_needed = true;
-        } else {
-            history_list_iterator_type list_it = map_it->second.my_history_list_iterator;
-            if (list_it != my_history_list.end()) {
-                __TBB_ASSERT(map_it->second.my_ref_counter == 0,
-                    "item to be evicted should not have a live references");
-
-                // Item is going to be used. Therefore it is not a subject for eviction,
-                // so we remove it from LRU history.
-                my_history_list.erase(list_it);
-                map_it->second.my_history_list_iterator = my_history_list.end();
-            }
-        }
-
-        ++(map_it->second.my_ref_counter);
-        return *map_it;
-    }
-};
-
-//-----------------------------------------------------------------------------
-// Value type for storage map in concurrent LRU cache
-//-----------------------------------------------------------------------------
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT>
-struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type {
-//typedefs
-public:
-    using ref_counter_type = std::size_t;
-
-// fields
-public:
-    value_type my_value;
-    ref_counter_type my_ref_counter;
-    history_list_iterator_type my_history_list_iterator;
-    std::atomic<bool> my_is_ready;
-
-// interface
-public:
-    storage_map_value_type(
-        value_type const& value, ref_counter_type ref_counter,
-        history_list_iterator_type history_list_iterator, bool is_ready)
-        : my_value(value), my_ref_counter(ref_counter),
-          my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {}
-};
-
-//-----------------------------------------------------------------------------
-// Handle object for operator[] in concurrent LRU cache
-//-----------------------------------------------------------------------------
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT>
-struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object {
-// fields
-private:
-    lru_cache_type* my_lru_cache_ptr;
-    storage_map_pointer_type my_map_record_ptr;
-
-// interface
-public:
-    handle_object()
-        : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {}
-    handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref)
-        : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {}
-
-    handle_object(handle_object&) = delete;
-    void operator=(handle_object&) = delete;
-
-    handle_object(handle_object&& other)
-        : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) {
-
-        __TBB_ASSERT(
-            bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr),
-            "invalid state of moving object?");
-
-        other.my_lru_cache_ptr = nullptr;
-        other.my_map_record_ptr = nullptr;
-    }
-
-    handle_object& operator=(handle_object&& other) {
-        __TBB_ASSERT(
-            bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr),
-            "invalid state of moving object?");
-
-        if (my_lru_cache_ptr)
-            my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr);
-
-        my_lru_cache_ptr = other.my_lru_cache_ptr;
-        my_map_record_ptr = other.my_map_record_ptr;
-        other.my_lru_cache_ptr = nullptr;
-        other.my_map_record_ptr = nullptr;
-
-        return *this;
-    }
-
-    ~handle_object() {
-        if (my_lru_cache_ptr)
-            my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr);
-    }
-
-    operator bool() const {
-        return (my_lru_cache_ptr && my_map_record_ptr);
-    }
-
-    value_type& value() {
-        __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?");
-        __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?");
-
-        return my_map_record_ptr->second.my_value;
-    }
-};
-
-//-----------------------------------------------------------------------------
-// Aggregator operation for aggregator type in concurrent LRU cache
-//-----------------------------------------------------------------------------
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT>
-struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation
-    : aggregated_operation<aggregator_operation> {
-// incapsulated helper classes
-public:
-    enum class op_type { retrieve, signal_end_of_usage };
-
-// fields
-private:
-    op_type my_op;
-
-// interface
-public:
-    aggregator_operation(op_type op) : my_op(op) {}
-
-    // TODO: aggregator_operation can be implemented
-    //   - as a statically typed variant type or CRTP? (static, dependent on the use case)
-    //   - or use pointer to function and apply_visitor (dynamic)
-    //   - or use virtual functions (dynamic)
-    void cast_and_handle(lru_cache_type& lru_cache_ref) {
-        if (my_op == op_type::retrieve)
-            static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref);
-        else
-            static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref);
-    }
-};
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT>
-struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation
-    : aggregator_operation, private no_assign {
-public:
-    key_type my_key;
-    storage_map_pointer_type my_map_record_ptr;
-    bool my_is_new_value_needed;
-
-public:
-    retrieve_aggregator_operation(key_type key)
-        : aggregator_operation(aggregator_operation::op_type::retrieve),
-          my_key(key), my_is_new_value_needed(false) {}
-
-    void handle(lru_cache_type& lru_cache_ref) {
-        my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed);
-    }
-
-    storage_map_reference_type result() { return *my_map_record_ptr; }
-
-    bool is_new_value_needed() { return my_is_new_value_needed; }
-};
-
-template<typename KeyT, typename ValT, typename KeyToValFunctorT>
-struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation
-    : aggregator_operation, private no_assign {
-
-private:
-    storage_map_reference_type my_map_record_ref;
-
-public:
-    signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref)
-        : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage),
-          my_map_record_ref(map_record_ref) {}
-
-    void handle(lru_cache_type& lru_cache_ref) {
-        lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref);
-    }
-};
-
-// TODO: if we have guarantees that KeyToValFunctorT always have
-//       ValT as a return type and KeyT as an argument type
-//       we can deduce template parameters of concurrent_lru_cache
-//       by pattern matching on KeyToValFunctorT
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_lru_cache;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_lru_cache_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_lru_cache_H 
+#define __TBB_concurrent_lru_cache_H 
+ 
+#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE 
+    #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h 
+#endif 
+ 
+#include "detail/_assert.h" 
+#include "detail/_aggregator.h" 
+ 
+#include <map>       // for std::map 
+#include <list>      // for std::list 
+#include <utility>   // for std::make_pair 
+#include <algorithm> // for std::find 
+#include <atomic>    // for std::atomic<bool> 
+ 
+namespace tbb { 
+ 
+namespace detail { 
+namespace d1 { 
+ 
+//----------------------------------------------------------------------------- 
+// Concurrent LRU cache 
+//----------------------------------------------------------------------------- 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)> 
+class concurrent_lru_cache : no_assign { 
+// incapsulated helper classes 
+private: 
+    struct handle_object; 
+    struct storage_map_value_type; 
+ 
+    struct aggregator_operation; 
+    struct retrieve_aggregator_operation; 
+    struct signal_end_of_usage_aggregator_operation; 
+ 
+// typedefs 
+public: 
+    using key_type = KeyT; 
+    using value_type = ValT; 
+    using pointer = ValT*; 
+    using reference = ValT&; 
+    using const_pointer = const ValT*; 
+    using const_reference = const ValT&; 
+ 
+    using value_function_type = KeyToValFunctorT; 
+    using handle = handle_object; 
+private: 
+    using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>; 
+ 
+    using storage_map_type = std::map<key_type, storage_map_value_type>; 
+    using storage_map_iterator_type = typename storage_map_type::iterator; 
+    using storage_map_pointer_type = typename storage_map_type::pointer; 
+    using storage_map_reference_type = typename storage_map_type::reference; 
+ 
+    using history_list_type = std::list<storage_map_iterator_type>; 
+    using history_list_iterator_type = typename history_list_type::iterator; 
+ 
+    using aggregator_operation_type = aggregator_operation; 
+    using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>; 
+    using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>; 
+ 
+    friend class aggregating_functor<lru_cache_type,aggregator_operation_type>; 
+ 
+// fields 
+private: 
+    value_function_type my_value_function; 
+    aggregator_type my_aggregator; 
+ 
+    storage_map_type my_storage_map;            // storage map for used objects 
+    history_list_type my_history_list;          // history list for unused objects 
+    const std::size_t my_history_list_capacity; // history list's allowed capacity 
+ 
+// interface 
+public: 
+ 
+    concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity) 
+        : my_value_function(value_function), my_history_list_capacity(cache_capacity) { 
+        my_aggregator.initialize_handler(aggregator_function_type(this)); 
+    } 
+ 
+    handle operator[](key_type key) { 
+        retrieve_aggregator_operation op(key); 
+        my_aggregator.execute(&op); 
+ 
+        if (op.is_new_value_needed()) { 
+            op.result().second.my_value = my_value_function(key); 
+            op.result().second.my_is_ready.store(true, std::memory_order_release); 
+        } else { 
+            spin_wait_while_eq(op.result().second.my_is_ready, false); 
+        } 
+ 
+        return handle(*this, op.result()); 
+    } 
+ 
+private: 
+ 
+    void handle_operations(aggregator_operation* op_list) { 
+        while (op_list) { 
+            op_list->cast_and_handle(*this); 
+            aggregator_operation* prev_op = op_list; 
+            op_list = op_list->next; 
+ 
+            (prev_op->status).store(1, std::memory_order_release); 
+        } 
+    } 
+ 
+    void signal_end_of_usage(storage_map_reference_type map_record_ref) { 
+        signal_end_of_usage_aggregator_operation op(map_record_ref); 
+        my_aggregator.execute(&op); 
+    } 
+ 
+    void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) { 
+        storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first); 
+ 
+        __TBB_ASSERT(map_it != my_storage_map.end(), 
+            "cache should not return past-end iterators to outer world"); 
+        __TBB_ASSERT(&(*map_it) == &map_record_ref, 
+            "dangling reference has been returned to outside world: data race?"); 
+        __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(), 
+            "object in use should not be in list of unused objects "); 
+ 
+        // if it was the last reference, put it to the LRU history 
+        if (! --(map_it->second.my_ref_counter)) { 
+            // if the LRU history is full, evict the oldest items to get space 
+            if (my_history_list.size() >= my_history_list_capacity) { 
+                std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; 
+ 
+                for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { 
+                    storage_map_iterator_type map_it_to_evict = my_history_list.back(); 
+ 
+                    __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0, 
+                        "item to be evicted should not have a live references"); 
+ 
+                    // TODO: can we use forward_list instead of list? pop_front / insert_after last 
+                    my_history_list.pop_back(); 
+                    my_storage_map.erase(map_it_to_evict); 
+                } 
+            } 
+ 
+            // TODO: can we use forward_list instead of list? pop_front / insert_after last 
+            my_history_list.push_front(map_it); 
+            map_it->second.my_history_list_iterator = my_history_list.begin(); 
+        } 
+    } 
+ 
+    storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) { 
+        storage_map_iterator_type map_it = my_storage_map.find(key); 
+ 
+        if (map_it == my_storage_map.end()) { 
+            map_it = my_storage_map.emplace_hint( 
+                map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false)); 
+            is_new_value_needed = true; 
+        } else { 
+            history_list_iterator_type list_it = map_it->second.my_history_list_iterator; 
+            if (list_it != my_history_list.end()) { 
+                __TBB_ASSERT(map_it->second.my_ref_counter == 0, 
+                    "item to be evicted should not have a live references"); 
+ 
+                // Item is going to be used. Therefore it is not a subject for eviction, 
+                // so we remove it from LRU history. 
+                my_history_list.erase(list_it); 
+                map_it->second.my_history_list_iterator = my_history_list.end(); 
+            } 
+        } 
+ 
+        ++(map_it->second.my_ref_counter); 
+        return *map_it; 
+    } 
+}; 
+ 
+//----------------------------------------------------------------------------- 
+// Value type for storage map in concurrent LRU cache 
+//----------------------------------------------------------------------------- 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT> 
+struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type { 
+//typedefs 
+public: 
+    using ref_counter_type = std::size_t; 
+ 
+// fields 
+public: 
+    value_type my_value; 
+    ref_counter_type my_ref_counter; 
+    history_list_iterator_type my_history_list_iterator; 
+    std::atomic<bool> my_is_ready; 
+ 
+// interface 
+public: 
+    storage_map_value_type( 
+        value_type const& value, ref_counter_type ref_counter, 
+        history_list_iterator_type history_list_iterator, bool is_ready) 
+        : my_value(value), my_ref_counter(ref_counter), 
+          my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {} 
+}; 
+ 
+//----------------------------------------------------------------------------- 
+// Handle object for operator[] in concurrent LRU cache 
+//----------------------------------------------------------------------------- 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT> 
+struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object { 
+// fields 
+private: 
+    lru_cache_type* my_lru_cache_ptr; 
+    storage_map_pointer_type my_map_record_ptr; 
+ 
+// interface 
+public: 
+    handle_object() 
+        : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {} 
+    handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref) 
+        : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {} 
+ 
+    handle_object(handle_object&) = delete; 
+    void operator=(handle_object&) = delete; 
+ 
+    handle_object(handle_object&& other) 
+        : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) { 
+ 
+        __TBB_ASSERT( 
+            bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), 
+            "invalid state of moving object?"); 
+ 
+        other.my_lru_cache_ptr = nullptr; 
+        other.my_map_record_ptr = nullptr; 
+    } 
+ 
+    handle_object& operator=(handle_object&& other) { 
+        __TBB_ASSERT( 
+            bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), 
+            "invalid state of moving object?"); 
+ 
+        if (my_lru_cache_ptr) 
+            my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); 
+ 
+        my_lru_cache_ptr = other.my_lru_cache_ptr; 
+        my_map_record_ptr = other.my_map_record_ptr; 
+        other.my_lru_cache_ptr = nullptr; 
+        other.my_map_record_ptr = nullptr; 
+ 
+        return *this; 
+    } 
+ 
+    ~handle_object() { 
+        if (my_lru_cache_ptr) 
+            my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); 
+    } 
+ 
+    operator bool() const { 
+        return (my_lru_cache_ptr && my_map_record_ptr); 
+    } 
+ 
+    value_type& value() { 
+        __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?"); 
+        __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?"); 
+ 
+        return my_map_record_ptr->second.my_value; 
+    } 
+}; 
+ 
+//----------------------------------------------------------------------------- 
+// Aggregator operation for aggregator type in concurrent LRU cache 
+//----------------------------------------------------------------------------- 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT> 
+struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation 
+    : aggregated_operation<aggregator_operation> { 
+// incapsulated helper classes 
+public: 
+    enum class op_type { retrieve, signal_end_of_usage }; 
+ 
+// fields 
+private: 
+    op_type my_op; 
+ 
+// interface 
+public: 
+    aggregator_operation(op_type op) : my_op(op) {} 
+ 
+    // TODO: aggregator_operation can be implemented 
+    //   - as a statically typed variant type or CRTP? (static, dependent on the use case) 
+    //   - or use pointer to function and apply_visitor (dynamic) 
+    //   - or use virtual functions (dynamic) 
+    void cast_and_handle(lru_cache_type& lru_cache_ref) { 
+        if (my_op == op_type::retrieve) 
+            static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref); 
+        else 
+            static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref); 
+    } 
+}; 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT> 
+struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation 
+    : aggregator_operation, private no_assign { 
+public: 
+    key_type my_key; 
+    storage_map_pointer_type my_map_record_ptr; 
+    bool my_is_new_value_needed; 
+ 
+public: 
+    retrieve_aggregator_operation(key_type key) 
+        : aggregator_operation(aggregator_operation::op_type::retrieve), 
+          my_key(key), my_is_new_value_needed(false) {} 
+ 
+    void handle(lru_cache_type& lru_cache_ref) { 
+        my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); 
+    } 
+ 
+    storage_map_reference_type result() { return *my_map_record_ptr; } 
+ 
+    bool is_new_value_needed() { return my_is_new_value_needed; } 
+}; 
+ 
+template<typename KeyT, typename ValT, typename KeyToValFunctorT> 
+struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation 
+    : aggregator_operation, private no_assign { 
+ 
+private: 
+    storage_map_reference_type my_map_record_ref; 
+ 
+public: 
+    signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref) 
+        : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage), 
+          my_map_record_ref(map_record_ref) {} 
+ 
+    void handle(lru_cache_type& lru_cache_ref) { 
+        lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref); 
+    } 
+}; 
+ 
+// TODO: if we have guarantees that KeyToValFunctorT always have 
+//       ValT as a return type and KeyT as an argument type 
+//       we can deduce template parameters of concurrent_lru_cache 
+//       by pattern matching on KeyToValFunctorT 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_lru_cache; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_lru_cache_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h
index ae389d4f42..3a3ec3e309 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h
@@ -1,342 +1,342 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_map_H
-#define __TBB_concurrent_map_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_concurrent_skip_list.h"
-#include "tbb_allocator.h"
-#include <functional>
-#include <tuple>
-#include <utility>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator,
-         typename Allocator, bool AllowMultimapping>
-struct map_traits {
-    static constexpr std::size_t max_level = RandomGenerator::max_level;
-    using random_level_generator_type = RandomGenerator;
-    using key_type = Key;
-    using mapped_type = Value;
-    using compare_type = KeyCompare;
-    using value_type = std::pair<const key_type, mapped_type>;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using allocator_type = Allocator;
-
-    static constexpr bool allow_multimapping = AllowMultimapping;
-
-    class value_compare {
-    public:
-        bool operator()(const value_type& lhs, const value_type& rhs) const {
-            return comp(lhs.first, rhs.first);
-        }
-
-    protected:
-        value_compare(compare_type c) : comp(c) {}
-
-        friend struct map_traits;
-
-        compare_type comp;
-    };
-
-    static value_compare value_comp(compare_type comp) { return value_compare(comp); }
-
-    static const key_type& get_key(const_reference val) {
-        return val.first;
-    }
-}; // struct map_traits
-
-template <typename Key, typename Value, typename Compare, typename Allocator>
-class concurrent_multimap;
-
-template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>>
-class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> {
-    using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>;
-public:
-    using key_type = Key;
-    using mapped_type = Value;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using key_compare = Compare;
-    using value_compare = typename base_type::value_compare;
-    using allocator_type = Allocator;
-
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base type
-    using base_type::base_type;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_map() = default;
-    concurrent_map( const concurrent_map& ) = default;
-    concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_map( concurrent_map&& ) = default;
-    concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_map& operator=( const concurrent_map& ) = default;
-    concurrent_map& operator=( concurrent_map&& ) = default;
-
-    // Observers
-    mapped_type& at(const key_type& key) {
-        iterator it = this->find(key);
-
-        if (it == this->end()) {
-            throw_exception(exception_id::invalid_key);
-        }
-        return it->second;
-    }
-
-    const mapped_type& at(const key_type& key) const {
-        return const_cast<concurrent_map*>(this)->at(key);
-    }
-
-    mapped_type& operator[](const key_type& key) {
-        iterator it = this->find(key);
-
-        if (it == this->end()) {
-            it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first;
-        }
-        return it->second;
-    }
-
-    mapped_type& operator[](key_type&& key) {
-        iterator it = this->find(key);
-
-        if (it == this->end()) {
-            it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first;
-        }
-        return it->second;
-    }
-
-    using base_type::insert;
-
-    template <typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            std::pair<iterator, bool>>::type insert( P&& value )
-    {
-        return this->emplace(std::forward<P>(value));
-    }
-
-    template <typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            iterator>::type insert( const_iterator hint, P&& value )
-    {
-        return this->emplace_hint(hint, std::forward<P>(value));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_map
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Comp = std::less<iterator_key_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() )
--> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>;
-
-template <typename Key, typename T,
-          typename Comp = std::less<std::remove_const_t<Key>>,
-          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() )
--> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_map( It, It, Alloc )
--> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>,
-                  std::less<iterator_key_t<It>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc )
--> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Value, typename Compare, typename Allocator>
-void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs,
-           concurrent_map<Key, Value, Compare, Allocator>& rhs )
-{
-    lhs.swap(rhs);
-}
-
-template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>>
-class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> {
-    using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>;
-public:
-    using key_type = Key;
-    using mapped_type = Value;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using key_compare = Compare;
-    using value_compare = typename base_type::value_compare;
-    using allocator_type = Allocator;
-
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base_type
-    using base_type::base_type;
-    using base_type::insert;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_multimap() = default;
-    concurrent_multimap( const concurrent_multimap& ) = default;
-    concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_multimap( concurrent_multimap&& ) = default;
-    concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_multimap& operator=( const concurrent_multimap& ) = default;
-    concurrent_multimap& operator=( concurrent_multimap&& ) = default;
-
-    template <typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            std::pair<iterator, bool>>::type insert( P&& value )
-    {
-        return this->emplace(std::forward<P>(value));
-    }
-
-    template <typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            iterator>::type insert( const_iterator hint, P&& value )
-    {
-        return this->emplace_hint(hint, std::forward<P>(value));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_multimap
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Comp = std::less<iterator_key_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() )
--> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>;
-
-template <typename Key, typename T,
-          typename Comp = std::less<std::remove_const_t<Key>>,
-          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() )
--> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_multimap( It, It, Alloc )
--> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>,
-                       std::less<iterator_key_t<It>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc )
--> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>;
-
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Value, typename Compare, typename Allocator>
-void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs,
-           concurrent_multimap<Key, Value, Compare, Allocator>& rhs )
-{
-    lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_map;
-using detail::d1::concurrent_multimap;
-using detail::split;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_map_H
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_map_H 
+#define __TBB_concurrent_map_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_concurrent_skip_list.h" 
+#include "tbb_allocator.h" 
+#include <functional> 
+#include <tuple> 
+#include <utility> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator, 
+         typename Allocator, bool AllowMultimapping> 
+struct map_traits { 
+    static constexpr std::size_t max_level = RandomGenerator::max_level; 
+    using random_level_generator_type = RandomGenerator; 
+    using key_type = Key; 
+    using mapped_type = Value; 
+    using compare_type = KeyCompare; 
+    using value_type = std::pair<const key_type, mapped_type>; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using allocator_type = Allocator; 
+ 
+    static constexpr bool allow_multimapping = AllowMultimapping; 
+ 
+    class value_compare { 
+    public: 
+        bool operator()(const value_type& lhs, const value_type& rhs) const { 
+            return comp(lhs.first, rhs.first); 
+        } 
+ 
+    protected: 
+        value_compare(compare_type c) : comp(c) {} 
+ 
+        friend struct map_traits; 
+ 
+        compare_type comp; 
+    }; 
+ 
+    static value_compare value_comp(compare_type comp) { return value_compare(comp); } 
+ 
+    static const key_type& get_key(const_reference val) { 
+        return val.first; 
+    } 
+}; // struct map_traits 
+ 
+template <typename Key, typename Value, typename Compare, typename Allocator> 
+class concurrent_multimap; 
+ 
+template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> 
+class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { 
+    using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; 
+public: 
+    using key_type = Key; 
+    using mapped_type = Value; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using key_compare = Compare; 
+    using value_compare = typename base_type::value_compare; 
+    using allocator_type = Allocator; 
+ 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+ 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+ 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base type 
+    using base_type::base_type; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_map() = default; 
+    concurrent_map( const concurrent_map& ) = default; 
+    concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_map( concurrent_map&& ) = default; 
+    concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_map& operator=( const concurrent_map& ) = default; 
+    concurrent_map& operator=( concurrent_map&& ) = default; 
+ 
+    // Observers 
+    mapped_type& at(const key_type& key) { 
+        iterator it = this->find(key); 
+ 
+        if (it == this->end()) { 
+            throw_exception(exception_id::invalid_key); 
+        } 
+        return it->second; 
+    } 
+ 
+    const mapped_type& at(const key_type& key) const { 
+        return const_cast<concurrent_map*>(this)->at(key); 
+    } 
+ 
+    mapped_type& operator[](const key_type& key) { 
+        iterator it = this->find(key); 
+ 
+        if (it == this->end()) { 
+            it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; 
+        } 
+        return it->second; 
+    } 
+ 
+    mapped_type& operator[](key_type&& key) { 
+        iterator it = this->find(key); 
+ 
+        if (it == this->end()) { 
+            it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; 
+        } 
+        return it->second; 
+    } 
+ 
+    using base_type::insert; 
+ 
+    template <typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            std::pair<iterator, bool>>::type insert( P&& value ) 
+    { 
+        return this->emplace(std::forward<P>(value)); 
+    } 
+ 
+    template <typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            iterator>::type insert( const_iterator hint, P&& value ) 
+    { 
+        return this->emplace_hint(hint, std::forward<P>(value)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_map 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Comp = std::less<iterator_key_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; 
+ 
+template <typename Key, typename T, 
+          typename Comp = std::less<std::remove_const_t<Key>>, 
+          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_map( It, It, Alloc ) 
+-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, 
+                  std::less<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc ) 
+-> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Value, typename Compare, typename Allocator> 
+void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs, 
+           concurrent_map<Key, Value, Compare, Allocator>& rhs ) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> 
+class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { 
+    using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; 
+public: 
+    using key_type = Key; 
+    using mapped_type = Value; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using key_compare = Compare; 
+    using value_compare = typename base_type::value_compare; 
+    using allocator_type = Allocator; 
+ 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+ 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+ 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base_type 
+    using base_type::base_type; 
+    using base_type::insert; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_multimap() = default; 
+    concurrent_multimap( const concurrent_multimap& ) = default; 
+    concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_multimap( concurrent_multimap&& ) = default; 
+    concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_multimap& operator=( const concurrent_multimap& ) = default; 
+    concurrent_multimap& operator=( concurrent_multimap&& ) = default; 
+ 
+    template <typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            std::pair<iterator, bool>>::type insert( P&& value ) 
+    { 
+        return this->emplace(std::forward<P>(value)); 
+    } 
+ 
+    template <typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            iterator>::type insert( const_iterator hint, P&& value ) 
+    { 
+        return this->emplace_hint(hint, std::forward<P>(value)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_multimap 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Comp = std::less<iterator_key_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; 
+ 
+template <typename Key, typename T, 
+          typename Comp = std::less<std::remove_const_t<Key>>, 
+          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_multimap( It, It, Alloc ) 
+-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, 
+                       std::less<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) 
+-> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; 
+ 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Value, typename Compare, typename Allocator> 
+void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs, 
+           concurrent_multimap<Key, Value, Compare, Allocator>& rhs ) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_map; 
+using detail::d1::concurrent_multimap; 
+using detail::split; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_map_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h
index a281740ad8..0147510af6 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h
@@ -1,490 +1,490 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_priority_queue_H
-#define __TBB_concurrent_priority_queue_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_aggregator.h"
-#include "detail/_template_helpers.h"
-#include "detail/_allocator_traits.h"
-#include "detail/_range_common.h"
-#include "detail/_exception.h"
-#include "detail/_utils.h"
-#include "detail/_containers_helpers.h"
-#include "cache_aligned_allocator.h"
-#include <vector>
-#include <iterator>
-#include <functional>
-#include <utility>
-#include <initializer_list>
-#include <type_traits>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>>
-class concurrent_priority_queue {
-public:
-    using value_type = T;
-    using reference = T&;
-    using const_reference = const T&;
-
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-
-    using allocator_type = Allocator;
-
-    concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {}
-
-    explicit concurrent_priority_queue( const allocator_type& alloc )
-        : mark(0), my_size(0), my_compare(), data(alloc)
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() )
-        : mark(0), my_size(0), my_compare(compare), data(alloc)
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() )
-        : mark(0), my_size(0), my_compare(), data(alloc)
-    {
-        data.reserve(init_capacity);
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() )
-        : mark(0), my_size(0), my_compare(compare), data(alloc)
-    {
-        data.reserve(init_capacity);
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    template <typename InputIterator>
-    concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() )
-        : mark(0), my_compare(compare), data(begin, end, alloc)
-    {
-        my_aggregator.initialize_handler(functor{this});
-        heapify();
-        my_size.store(data.size(), std::memory_order_relaxed);
-    }
-
-    template <typename InputIterator>
-    concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() )
-        : concurrent_priority_queue(begin, end, Compare(), alloc) {}
-
-    concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() )
-        : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {}
-
-    concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() )
-        : concurrent_priority_queue(init, Compare(), alloc) {}
-
-    concurrent_priority_queue( const concurrent_priority_queue& other )
-        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
-          data(other.data)
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc )
-        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
-          data(other.data, alloc)
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    concurrent_priority_queue( concurrent_priority_queue&& other )
-        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
-          data(std::move(other.data))
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc )
-        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
-          data(std::move(other.data), alloc)
-    {
-        my_aggregator.initialize_handler(functor{this});
-    }
-
-    concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) {
-        if (this != &other) {
-            data = other.data;
-            mark = other.mark;
-            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        }
-        return *this;
-    }
-
-    concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) {
-        if (this != &other) {
-            // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately
-            data = std::move(other.data);
-            mark = other.mark;
-            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        }
-        return *this;
-    }
-
-    concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) {
-        assign(init.begin(), init.end());
-        return *this;
-    }
-
-    template <typename InputIterator>
-    void assign( InputIterator begin, InputIterator end ) {
-        data.assign(begin, end);
-        mark = 0;
-        my_size.store(data.size(), std::memory_order_relaxed);
-        heapify();
-    }
-
-    void assign( std::initializer_list<value_type> init ) {
-        assign(init.begin(), init.end());
-    }
-
-    /* Returned value may not reflect results of pending operations.
-       This operation reads shared data and will trigger a race condition. */
-    __TBB_nodiscard bool empty() const { return size() == 0; }
-
-    // Returns the current number of elements contained in the queue
-    /* Returned value may not reflect results of pending operations.
-       This operation reads shared data and will trigger a race condition. */
-    size_type size() const { return my_size.load(std::memory_order_relaxed); }
-
-    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    void push( const value_type& value ) {
-        cpq_operation op_data(value, PUSH_OP);
-        my_aggregator.execute(&op_data);
-        if (op_data.status == FAILED)
-            throw_exception(exception_id::bad_alloc);
-    }
-
-    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    void push( value_type&& value ) {
-        cpq_operation op_data(value, PUSH_RVALUE_OP);
-        my_aggregator.execute(&op_data);
-        if (op_data.status == FAILED)
-            throw_exception(exception_id::bad_alloc);
-    }
-
-    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    template <typename... Args>
-    void emplace( Args&&... args ) {
-        // TODO: support uses allocator construction in this place
-        push(value_type(std::forward<Args>(args)...));
-    }
-
-    // Gets a reference to and removes highest priority element
-    /* If a highest priority element was found, sets elem and returns true,
-       otherwise returns false.
-       This operation can be safely used concurrently with other push, try_pop or emplace operations. */
-    bool try_pop( value_type& value ) {
-        cpq_operation op_data(value, POP_OP);
-        my_aggregator.execute(&op_data);
-        return op_data.status == SUCCEEDED;
-    }
-
-    // This operation affects the whole container => it is not thread-safe
-    void clear() {
-        data.clear();
-        mark = 0;
-        my_size.store(0, std::memory_order_relaxed);
-    }
-
-    // This operation affects the whole container => it is not thread-safe
-    void swap( concurrent_priority_queue& other ) {
-        if (this != &other) {
-            using std::swap;
-            swap(data, other.data);
-            swap(mark, other.mark);
-
-            size_type sz = my_size.load(std::memory_order_relaxed);
-            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            other.my_size.store(sz, std::memory_order_relaxed);
-        }
-    }
-
-    allocator_type get_allocator() const { return data.get_allocator(); }
-private:
-    enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP};
-    enum operation_status {WAIT = 0, SUCCEEDED, FAILED};
-
-    class cpq_operation : public aggregated_operation<cpq_operation> {
-    public:
-        operation_type type;
-        union {
-            value_type* elem;
-            size_type sz;
-        };
-        cpq_operation( const value_type& value, operation_type t )
-            : type(t), elem(const_cast<value_type*>(&value)) {}
-    }; // class cpq_operation
-
-    class functor {
-        concurrent_priority_queue* my_cpq;
-    public:
-        functor() : my_cpq(nullptr) {}
-        functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {}
-
-        void operator()(cpq_operation* op_list) {
-            __TBB_ASSERT(my_cpq != nullptr, "Invalid functor");
-            my_cpq->handle_operations(op_list);
-        }
-    }; // class functor
-
-    void handle_operations( cpq_operation* op_list ) {
-        call_itt_notify(acquired, this);
-        cpq_operation* tmp, *pop_list = nullptr;
-        __TBB_ASSERT(mark == data.size(), NULL);
-
-        // First pass processes all constant (amortized; reallocation may happen) time pushes and pops.
-        while(op_list) {
-            // ITT note: &(op_list->status) tag is used to cover accesses to op_list
-            // node. This thread is going to handle the operation, and so will acquire it
-            // and perform the associated operation w/o triggering a race condition; the
-            // thread that created the operation is waiting on the status field, so when
-            // this thread is done with the operation, it will perform a
-            // store_with_release to give control back to the waiting thread in
-            // aggregator::insert_operation.
-            // TODO: enable
-            call_itt_notify(acquired, &(op_list->status));
-            __TBB_ASSERT(op_list->type != INVALID_OP, NULL);
-
-            tmp = op_list;
-            op_list = op_list->next.load(std::memory_order_relaxed);
-            if (tmp->type == POP_OP) {
-                if (mark < data.size() &&
-                    my_compare(data[0], data.back()))
-                {
-                    // there are newly pushed elems and the last one is higher than top
-                    *(tmp->elem) = std::move(data.back());
-                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
-                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
-
-                    data.pop_back();
-                    __TBB_ASSERT(mark <= data.size(), NULL);
-                } else { // no convenient item to pop; postpone
-                    tmp->next.store(pop_list, std::memory_order_relaxed);
-                    pop_list = tmp;
-                }
-            } else { // PUSH_OP or PUSH_RVALUE_OP
-                __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation");
-#if TBB_USE_EXCEPTIONS
-                try
-#endif
-                {
-                    if (tmp->type == PUSH_OP) {
-                        push_back_helper(*(tmp->elem));
-                    } else {
-                        data.push_back(std::move(*(tmp->elem)));
-                    }
-                    my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
-                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
-                }
-#if TBB_USE_EXCEPTIONS
-                catch(...) {
-                    tmp->status.store(uintptr_t(FAILED), std::memory_order_release);
-                }
-#endif
-            }
-        }
-
-        // Second pass processes pop operations
-        while(pop_list) {
-            tmp = pop_list;
-            pop_list = pop_list->next.load(std::memory_order_relaxed);
-            __TBB_ASSERT(tmp->type == POP_OP, NULL);
-            if (data.empty()) {
-                tmp->status.store(uintptr_t(FAILED), std::memory_order_release);
-            } else {
-                __TBB_ASSERT(mark <= data.size(), NULL);
-                if (mark < data.size() &&
-                    my_compare(data[0], data.back()))
-                {
-                    // there are newly pushed elems and the last one is higher than top
-                    *(tmp->elem) = std::move(data.back());
-                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
-                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
-                    data.pop_back();
-                } else { // extract top and push last element down heap
-                    *(tmp->elem) = std::move(data[0]);
-                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
-                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
-                    reheap();
-                }
-            }
-        }
-
-        // heapify any leftover pushed elements before doing the next
-        // batch of operations
-        if (mark < data.size()) heapify();
-        __TBB_ASSERT(mark == data.size(), NULL);
-        call_itt_notify(releasing, this);
-    }
-
-    // Merge unsorted elements into heap
-    void heapify() {
-        if (!mark && data.size() > 0) mark = 1;
-        for (; mark < data.size(); ++mark) {
-            // for each unheapified element under size
-            size_type cur_pos = mark;
-            value_type to_place = std::move(data[mark]);
-            do { // push to_place up the heap
-                size_type parent = (cur_pos - 1) >> 1;
-                if (!my_compare(data[parent], to_place))
-                    break;
-                data[cur_pos] = std::move(data[parent]);
-                cur_pos = parent;
-            } while(cur_pos);
-            data[cur_pos] = std::move(to_place);
-        }
-    }
-
-    // Re-heapify after an extraction
-    // Re-heapify by pushing last element down the heap from the root.
-    void reheap() {
-        size_type cur_pos = 0, child = 1;
-
-        while(child < mark) {
-            size_type target = child;
-            if (child + 1 < mark && my_compare(data[child], data[child + 1]))
-                ++target;
-            // target now has the higher priority child
-            if (my_compare(data[target], data.back()))
-                break;
-            data[cur_pos] = std::move(data[target]);
-            cur_pos = target;
-            child = (cur_pos << 1) + 1;
-        }
-        if (cur_pos != data.size() - 1)
-            data[cur_pos] = std::move(data.back());
-        data.pop_back();
-        if (mark > data.size()) mark = data.size();
-    }
-
-    void push_back_helper( const T& value ) {
-        push_back_helper_impl(value, std::is_copy_constructible<T>{});
-    }
-
-    void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) {
-        data.push_back(value);
-    }
-
-    void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) {
-        __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type");
-    }
-
-    using aggregator_type = aggregator<functor, cpq_operation>;
-
-    aggregator_type my_aggregator;
-    // Padding added to avoid false sharing
-    char padding1[max_nfs_size - sizeof(aggregator_type)];
-    // The point at which unsorted elements begin
-    size_type mark;
-    std::atomic<size_type> my_size;
-    Compare my_compare;
-
-    // Padding added to avoid false sharing
-    char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)];
-    //! Storage for the heap of elements in queue, plus unheapified elements
-    /** data has the following structure:
-
-         binary unheapified
-          heap   elements
-        ____|_______|____
-        |       |       |
-        v       v       v
-        [_|...|_|_|...|_| |...| ]
-         0       ^       ^       ^
-                 |       |       |__capacity
-                 |       |__my_size
-                 |__mark
-
-        Thus, data stores the binary heap starting at position 0 through
-        mark-1 (it may be empty).  Then there are 0 or more elements
-        that have not yet been inserted into the heap, in positions
-        mark through my_size-1. */
-
-    using vector_type = std::vector<value_type, allocator_type>;
-    vector_type data;
-
-    friend bool operator==( const concurrent_priority_queue& lhs,
-                            const concurrent_priority_queue& rhs )
-    {
-        return lhs.data == rhs.data;
-    }
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-    friend bool operator!=( const concurrent_priority_queue& lhs,
-                            const concurrent_priority_queue& rhs )
-    {
-        return !(lhs == rhs);
-    }
-#endif
-}; // class concurrent_priority_queue
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename It,
-          typename Comp = std::less<iterator_value_t<It>>,
-          typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() )
--> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_priority_queue( It, It, Alloc )
--> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>;
-
-template <typename T,
-          typename Comp = std::less<T>,
-          typename Alloc = tbb::cache_aligned_allocator<T>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() )
--> concurrent_priority_queue<T, Comp, Alloc>;
-
-template <typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_priority_queue( std::initializer_list<T>, Alloc )
--> concurrent_priority_queue<T, std::less<T>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename T, typename Compare, typename Allocator>
-void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs,
-           concurrent_priority_queue<T, Compare, Allocator>& rhs )
-{
-    lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-inline namespace v1 {
-using detail::d1::concurrent_priority_queue;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_priority_queue_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_priority_queue_H 
+#define __TBB_concurrent_priority_queue_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_aggregator.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_allocator_traits.h" 
+#include "detail/_range_common.h" 
+#include "detail/_exception.h" 
+#include "detail/_utils.h" 
+#include "detail/_containers_helpers.h" 
+#include "cache_aligned_allocator.h" 
+#include <vector> 
+#include <iterator> 
+#include <functional> 
+#include <utility> 
+#include <initializer_list> 
+#include <type_traits> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>> 
+class concurrent_priority_queue { 
+public: 
+    using value_type = T; 
+    using reference = T&; 
+    using const_reference = const T&; 
+ 
+    using size_type = std::size_t; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    using allocator_type = Allocator; 
+ 
+    concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {} 
+ 
+    explicit concurrent_priority_queue( const allocator_type& alloc ) 
+        : mark(0), my_size(0), my_compare(), data(alloc) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() ) 
+        : mark(0), my_size(0), my_compare(compare), data(alloc) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() ) 
+        : mark(0), my_size(0), my_compare(), data(alloc) 
+    { 
+        data.reserve(init_capacity); 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() ) 
+        : mark(0), my_size(0), my_compare(compare), data(alloc) 
+    { 
+        data.reserve(init_capacity); 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() ) 
+        : mark(0), my_compare(compare), data(begin, end, alloc) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+        heapify(); 
+        my_size.store(data.size(), std::memory_order_relaxed); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() ) 
+        : concurrent_priority_queue(begin, end, Compare(), alloc) {} 
+ 
+    concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() ) 
+        : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {} 
+ 
+    concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ) 
+        : concurrent_priority_queue(init, Compare(), alloc) {} 
+ 
+    concurrent_priority_queue( const concurrent_priority_queue& other ) 
+        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), 
+          data(other.data) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc ) 
+        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), 
+          data(other.data, alloc) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    concurrent_priority_queue( concurrent_priority_queue&& other ) 
+        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), 
+          data(std::move(other.data)) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc ) 
+        : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), 
+          data(std::move(other.data), alloc) 
+    { 
+        my_aggregator.initialize_handler(functor{this}); 
+    } 
+ 
+    concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) { 
+        if (this != &other) { 
+            data = other.data; 
+            mark = other.mark; 
+            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) { 
+        if (this != &other) { 
+            // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately 
+            data = std::move(other.data); 
+            mark = other.mark; 
+            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) { 
+        assign(init.begin(), init.end()); 
+        return *this; 
+    } 
+ 
+    template <typename InputIterator> 
+    void assign( InputIterator begin, InputIterator end ) { 
+        data.assign(begin, end); 
+        mark = 0; 
+        my_size.store(data.size(), std::memory_order_relaxed); 
+        heapify(); 
+    } 
+ 
+    void assign( std::initializer_list<value_type> init ) { 
+        assign(init.begin(), init.end()); 
+    } 
+ 
+    /* Returned value may not reflect results of pending operations. 
+       This operation reads shared data and will trigger a race condition. */ 
+    __TBB_nodiscard bool empty() const { return size() == 0; } 
+ 
+    // Returns the current number of elements contained in the queue 
+    /* Returned value may not reflect results of pending operations. 
+       This operation reads shared data and will trigger a race condition. */ 
+    size_type size() const { return my_size.load(std::memory_order_relaxed); } 
+ 
+    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ 
+    void push( const value_type& value ) { 
+        cpq_operation op_data(value, PUSH_OP); 
+        my_aggregator.execute(&op_data); 
+        if (op_data.status == FAILED) 
+            throw_exception(exception_id::bad_alloc); 
+    } 
+ 
+    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ 
+    void push( value_type&& value ) { 
+        cpq_operation op_data(value, PUSH_RVALUE_OP); 
+        my_aggregator.execute(&op_data); 
+        if (op_data.status == FAILED) 
+            throw_exception(exception_id::bad_alloc); 
+    } 
+ 
+    /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ 
+    template <typename... Args> 
+    void emplace( Args&&... args ) { 
+        // TODO: support uses allocator construction in this place 
+        push(value_type(std::forward<Args>(args)...)); 
+    } 
+ 
+    // Gets a reference to and removes highest priority element 
+    /* If a highest priority element was found, sets elem and returns true, 
+       otherwise returns false. 
+       This operation can be safely used concurrently with other push, try_pop or emplace operations. */ 
+    bool try_pop( value_type& value ) { 
+        cpq_operation op_data(value, POP_OP); 
+        my_aggregator.execute(&op_data); 
+        return op_data.status == SUCCEEDED; 
+    } 
+ 
+    // This operation affects the whole container => it is not thread-safe 
+    void clear() { 
+        data.clear(); 
+        mark = 0; 
+        my_size.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    // This operation affects the whole container => it is not thread-safe 
+    void swap( concurrent_priority_queue& other ) { 
+        if (this != &other) { 
+            using std::swap; 
+            swap(data, other.data); 
+            swap(mark, other.mark); 
+ 
+            size_type sz = my_size.load(std::memory_order_relaxed); 
+            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            other.my_size.store(sz, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+    allocator_type get_allocator() const { return data.get_allocator(); } 
+private: 
+    enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP}; 
+    enum operation_status {WAIT = 0, SUCCEEDED, FAILED}; 
+ 
+    class cpq_operation : public aggregated_operation<cpq_operation> { 
+    public: 
+        operation_type type; 
+        union { 
+            value_type* elem; 
+            size_type sz; 
+        }; 
+        cpq_operation( const value_type& value, operation_type t ) 
+            : type(t), elem(const_cast<value_type*>(&value)) {} 
+    }; // class cpq_operation 
+ 
+    class functor { 
+        concurrent_priority_queue* my_cpq; 
+    public: 
+        functor() : my_cpq(nullptr) {} 
+        functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {} 
+ 
+        void operator()(cpq_operation* op_list) { 
+            __TBB_ASSERT(my_cpq != nullptr, "Invalid functor"); 
+            my_cpq->handle_operations(op_list); 
+        } 
+    }; // class functor 
+ 
+    void handle_operations( cpq_operation* op_list ) { 
+        call_itt_notify(acquired, this); 
+        cpq_operation* tmp, *pop_list = nullptr; 
+        __TBB_ASSERT(mark == data.size(), NULL); 
+ 
+        // First pass processes all constant (amortized; reallocation may happen) time pushes and pops. 
+        while(op_list) { 
+            // ITT note: &(op_list->status) tag is used to cover accesses to op_list 
+            // node. This thread is going to handle the operation, and so will acquire it 
+            // and perform the associated operation w/o triggering a race condition; the 
+            // thread that created the operation is waiting on the status field, so when 
+            // this thread is done with the operation, it will perform a 
+            // store_with_release to give control back to the waiting thread in 
+            // aggregator::insert_operation. 
+            // TODO: enable 
+            call_itt_notify(acquired, &(op_list->status)); 
+            __TBB_ASSERT(op_list->type != INVALID_OP, NULL); 
+ 
+            tmp = op_list; 
+            op_list = op_list->next.load(std::memory_order_relaxed); 
+            if (tmp->type == POP_OP) { 
+                if (mark < data.size() && 
+                    my_compare(data[0], data.back())) 
+                { 
+                    // there are newly pushed elems and the last one is higher than top 
+                    *(tmp->elem) = std::move(data.back()); 
+                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); 
+                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); 
+ 
+                    data.pop_back(); 
+                    __TBB_ASSERT(mark <= data.size(), NULL); 
+                } else { // no convenient item to pop; postpone 
+                    tmp->next.store(pop_list, std::memory_order_relaxed); 
+                    pop_list = tmp; 
+                } 
+            } else { // PUSH_OP or PUSH_RVALUE_OP 
+                __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation"); 
+#if TBB_USE_EXCEPTIONS 
+                try 
+#endif 
+                { 
+                    if (tmp->type == PUSH_OP) { 
+                        push_back_helper(*(tmp->elem)); 
+                    } else { 
+                        data.push_back(std::move(*(tmp->elem))); 
+                    } 
+                    my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); 
+                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); 
+                } 
+#if TBB_USE_EXCEPTIONS 
+                catch(...) { 
+                    tmp->status.store(uintptr_t(FAILED), std::memory_order_release); 
+                } 
+#endif 
+            } 
+        } 
+ 
+        // Second pass processes pop operations 
+        while(pop_list) { 
+            tmp = pop_list; 
+            pop_list = pop_list->next.load(std::memory_order_relaxed); 
+            __TBB_ASSERT(tmp->type == POP_OP, NULL); 
+            if (data.empty()) { 
+                tmp->status.store(uintptr_t(FAILED), std::memory_order_release); 
+            } else { 
+                __TBB_ASSERT(mark <= data.size(), NULL); 
+                if (mark < data.size() && 
+                    my_compare(data[0], data.back())) 
+                { 
+                    // there are newly pushed elems and the last one is higher than top 
+                    *(tmp->elem) = std::move(data.back()); 
+                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); 
+                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); 
+                    data.pop_back(); 
+                } else { // extract top and push last element down heap 
+                    *(tmp->elem) = std::move(data[0]); 
+                    my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); 
+                    tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); 
+                    reheap(); 
+                } 
+            } 
+        } 
+ 
+        // heapify any leftover pushed elements before doing the next 
+        // batch of operations 
+        if (mark < data.size()) heapify(); 
+        __TBB_ASSERT(mark == data.size(), NULL); 
+        call_itt_notify(releasing, this); 
+    } 
+ 
+    // Merge unsorted elements into heap 
+    void heapify() { 
+        if (!mark && data.size() > 0) mark = 1; 
+        for (; mark < data.size(); ++mark) { 
+            // for each unheapified element under size 
+            size_type cur_pos = mark; 
+            value_type to_place = std::move(data[mark]); 
+            do { // push to_place up the heap 
+                size_type parent = (cur_pos - 1) >> 1; 
+                if (!my_compare(data[parent], to_place)) 
+                    break; 
+                data[cur_pos] = std::move(data[parent]); 
+                cur_pos = parent; 
+            } while(cur_pos); 
+            data[cur_pos] = std::move(to_place); 
+        } 
+    } 
+ 
+    // Re-heapify after an extraction 
+    // Re-heapify by pushing last element down the heap from the root. 
+    void reheap() { 
+        size_type cur_pos = 0, child = 1; 
+ 
+        while(child < mark) { 
+            size_type target = child; 
+            if (child + 1 < mark && my_compare(data[child], data[child + 1])) 
+                ++target; 
+            // target now has the higher priority child 
+            if (my_compare(data[target], data.back())) 
+                break; 
+            data[cur_pos] = std::move(data[target]); 
+            cur_pos = target; 
+            child = (cur_pos << 1) + 1; 
+        } 
+        if (cur_pos != data.size() - 1) 
+            data[cur_pos] = std::move(data.back()); 
+        data.pop_back(); 
+        if (mark > data.size()) mark = data.size(); 
+    } 
+ 
+    void push_back_helper( const T& value ) { 
+        push_back_helper_impl(value, std::is_copy_constructible<T>{}); 
+    } 
+ 
+    void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) { 
+        data.push_back(value); 
+    } 
+ 
+    void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) { 
+        __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type"); 
+    } 
+ 
+    using aggregator_type = aggregator<functor, cpq_operation>; 
+ 
+    aggregator_type my_aggregator; 
+    // Padding added to avoid false sharing 
+    char padding1[max_nfs_size - sizeof(aggregator_type)]; 
+    // The point at which unsorted elements begin 
+    size_type mark; 
+    std::atomic<size_type> my_size; 
+    Compare my_compare; 
+ 
+    // Padding added to avoid false sharing 
+    char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)]; 
+    //! Storage for the heap of elements in queue, plus unheapified elements 
+    /** data has the following structure: 
+ 
+         binary unheapified 
+          heap   elements 
+        ____|_______|____ 
+        |       |       | 
+        v       v       v 
+        [_|...|_|_|...|_| |...| ] 
+         0       ^       ^       ^ 
+                 |       |       |__capacity 
+                 |       |__my_size 
+                 |__mark 
+ 
+        Thus, data stores the binary heap starting at position 0 through 
+        mark-1 (it may be empty).  Then there are 0 or more elements 
+        that have not yet been inserted into the heap, in positions 
+        mark through my_size-1. */ 
+ 
+    using vector_type = std::vector<value_type, allocator_type>; 
+    vector_type data; 
+ 
+    friend bool operator==( const concurrent_priority_queue& lhs, 
+                            const concurrent_priority_queue& rhs ) 
+    { 
+        return lhs.data == rhs.data; 
+    } 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+    friend bool operator!=( const concurrent_priority_queue& lhs, 
+                            const concurrent_priority_queue& rhs ) 
+    { 
+        return !(lhs == rhs); 
+    } 
+#endif 
+}; // class concurrent_priority_queue 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename It, 
+          typename Comp = std::less<iterator_value_t<It>>, 
+          typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_priority_queue( It, It, Alloc ) 
+-> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename T, 
+          typename Comp = std::less<T>, 
+          typename Alloc = tbb::cache_aligned_allocator<T>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_priority_queue<T, Comp, Alloc>; 
+ 
+template <typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_priority_queue( std::initializer_list<T>, Alloc ) 
+-> concurrent_priority_queue<T, std::less<T>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename T, typename Compare, typename Allocator> 
+void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs, 
+           concurrent_priority_queue<T, Compare, Allocator>& rhs ) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+inline namespace v1 { 
+using detail::d1::concurrent_priority_queue; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_priority_queue_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h
index c8ae7afff7..7b4f2fb766 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h
@@ -1,592 +1,592 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_queue_H
-#define __TBB_concurrent_queue_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_concurrent_queue_base.h"
-#include "detail/_allocator_traits.h"
-#include "detail/_exception.h"
-#include "detail/_containers_helpers.h"
-#include "cache_aligned_allocator.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-// A high-performance thread-safe non-blocking concurrent queue.
-// Multiple threads may each push and pop concurrently.
-// Assignment construction is not allowed.
-template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
-class concurrent_queue {
-    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
-    using queue_representation_type = concurrent_queue_rep<T, Allocator>;
-    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>;
-    using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>;
-public:
-    using size_type = std::size_t;
-    using value_type = T;
-    using reference = T&;
-    using const_reference = const T&;
-    using difference_type = std::ptrdiff_t;
-
-    using allocator_type = Allocator;
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>;
-    using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>;
-
-    concurrent_queue() : concurrent_queue(allocator_type()) {}
-
-    explicit concurrent_queue(const allocator_type& a) :
-        my_allocator(a), my_queue_representation(nullptr)
-    {
-        my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type)));
-        queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator);
-
-        __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" );
-    }
-
-    template <typename InputIterator>
-    concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
-        concurrent_queue(a)
-    {
-        for (; begin != end; ++begin)
-            push(*begin);
-    }
-
-    concurrent_queue(const concurrent_queue& src, const allocator_type& a) :
-        concurrent_queue(a)
-    {
-        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item);
-    }
-
-    concurrent_queue(const concurrent_queue& src) :
-        concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator()))
-    {
-        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item);
-    }
-
-    // Move constructors
-    concurrent_queue(concurrent_queue&& src) :
-        concurrent_queue(std::move(src.my_allocator))
-    {
-        internal_swap(src);
-    }
-
-    concurrent_queue(concurrent_queue&& src, const allocator_type& a) :
-        concurrent_queue(a)
-    {
-        // checking that memory allocated by one instance of allocator can be deallocated
-        // with another
-        if (my_allocator == src.my_allocator) {
-            internal_swap(src);
-        } else {
-            // allocators are different => performing per-element move
-            my_queue_representation->assign(*src.my_queue_representation, move_construct_item);
-            src.clear();
-        }
-    }
-
-    // Destroy queue
-    ~concurrent_queue() {
-        clear();
-        my_queue_representation->clear();
-        queue_allocator_traits::destroy(my_allocator, my_queue_representation);
-        r1::cache_aligned_deallocate(my_queue_representation);
-    }
-
-    // Enqueue an item at tail of queue.
-    void push(const T& value) {
-        internal_push(value);
-    }
-
-    void push(T&& value) {
-        internal_push(std::move(value));
-    }
-
-    template <typename... Args>
-    void emplace( Args&&... args ) {
-        internal_push(std::forward<Args>(args)...);
-    }
-
-    // Attempt to dequeue an item from head of queue.
-    /** Does not wait for item to become available.
-        Returns true if successful; false otherwise. */
-    bool try_pop( T& result ) {
-        return internal_try_pop(&result);
-    }
-
-    // Return the number of items in the queue; thread unsafe
-    size_type unsafe_size() const {
-        std::ptrdiff_t size = my_queue_representation->size();
-        return size < 0 ? 0 :  size_type(size);
-    }
-
-    // Equivalent to size()==0.
-    __TBB_nodiscard bool empty() const {
-        return my_queue_representation->empty();
-    }
-
-    // Clear the queue. not thread-safe.
-    void clear() {
-        while (!empty()) {
-            T value;
-            try_pop(value);
-        }
-    }
-
-    // Return allocator object
-    allocator_type get_allocator() const { return my_allocator; }
-
-    //------------------------------------------------------------------------
-    // The iterators are intended only for debugging.  They are slow and not thread safe.
-    //------------------------------------------------------------------------
-
-    iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); }
-    iterator unsafe_end() { return iterator(); }
-    const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
-    const_iterator unsafe_end() const { return const_iterator(); }
-    const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
-    const_iterator unsafe_cend() const { return const_iterator(); }
-
-private:
-    void internal_swap(concurrent_queue& src) {
-        using std::swap;
-        swap(my_queue_representation, src.my_queue_representation);
-    }
-
-    template <typename... Args>
-    void internal_push( Args&&... args ) {
-        ticket_type k = my_queue_representation->tail_counter++;
-        my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...);
-    }
-
-    bool internal_try_pop( void* dst ) {
-        ticket_type k;
-        do {
-            k = my_queue_representation->head_counter.load(std::memory_order_relaxed);
-            do {
-                if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) {
-                    // Queue is empty
-                    return false;
-                }
-
-                // Queue had item with ticket k when we looked. Attempt to get that item.
-                // Another thread snatched the item, retry.
-            } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1));
-        } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation));
-        return true;
-    }
-
-    template <typename Container, typename Value, typename A>
-    friend class concurrent_queue_iterator;
-
-    static void copy_construct_item(T* location, const void* src) {
-        // TODO: use allocator_traits for copy construction
-        new (location) value_type(*static_cast<const value_type*>(src));
-        // queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src));
-    }
-
-    static void move_construct_item(T* location, const void* src) {
-        // TODO: use allocator_traits for move construction
-        new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src))));
-    }
-
-    queue_allocator_type my_allocator;
-    queue_representation_type* my_queue_representation;
-}; // class concurrent_queue
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-// Deduction guide for the constructor from two iterators
-template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_queue( It, It, Alloc = Alloc() )
--> concurrent_queue<iterator_value_t<It>, Alloc>;
-
-#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
-
-class concurrent_monitor;
-
-template <typename FuncType>
-class delegated_function : public delegate_base {
-public:
-    delegated_function(FuncType& f) : my_func(f) {}
-
-    bool operator()() const override {
-        return my_func();
-    }
-
-private:
-    FuncType &my_func;
-}; // class delegated_function
-
-// The concurrent monitor tags for concurrent_bounded_queue.
-static constexpr std::size_t cbq_slots_avail_tag = 0;
-static constexpr std::size_t cbq_items_avail_tag = 1;
-} // namespace d1
-
-
-namespace r1 {
-    class concurrent_monitor;
-
-    std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size );
-    void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size );
-    void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors );
-    void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag
-                                                            , std::size_t ticket );
-    void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag,
-                                                            std::ptrdiff_t target, d1::delegate_base& predicate );
-} // namespace r1
-
-
-namespace d1 {
-// A high-performance thread-safe blocking concurrent bounded queue.
-// Supports boundedness and blocking semantics.
-// Multiple threads may each push and pop concurrently.
-// Assignment construction is not allowed.
-template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
-class concurrent_bounded_queue {
-    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
-    using queue_representation_type = concurrent_queue_rep<T, Allocator>;
-    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>;
-    using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>;
-
-    template <typename FuncType>
-    void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) {
-        delegated_function<FuncType> func(pred);
-        r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func);
-    }
-public:
-    using size_type = std::ptrdiff_t;
-    using value_type = T;
-    using reference = T&;
-    using const_reference = const T&;
-    using difference_type = std::ptrdiff_t;
-
-    using allocator_type = Allocator;
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>;
-    using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ;
-
-    concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {}
-
-    explicit concurrent_bounded_queue( const allocator_type& a ) :
-        my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr)
-    {
-        my_queue_representation = reinterpret_cast<queue_representation_type*>(
-            r1::allocate_bounded_queue_rep(sizeof(queue_representation_type)));
-        my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1);
-        queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator);
-        my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2);
-
-        __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" );
-        __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" );
-    }
-
-    template <typename InputIterator>
-    concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) :
-        concurrent_bounded_queue(a)
-    {
-        for (; begin != end; ++begin)
-            push(*begin);
-    }
-
-    concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) :
-        concurrent_bounded_queue(a)
-    {
-        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item);
-    }
-
-    concurrent_bounded_queue( const concurrent_bounded_queue& src ) :
-        concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator()))
-    {
-        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item);
-    }
-
-    // Move constructors
-    concurrent_bounded_queue( concurrent_bounded_queue&& src ) :
-        concurrent_bounded_queue(std::move(src.my_allocator))
-    {
-        internal_swap(src);
-    }
-
-    concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) :
-        concurrent_bounded_queue(a)
-    {
-        // checking that memory allocated by one instance of allocator can be deallocated
-        // with another
-        if (my_allocator == src.my_allocator) {
-            internal_swap(src);
-        } else {
-            // allocators are different => performing per-element move
-            my_queue_representation->assign(*src.my_queue_representation, move_construct_item);
-            src.clear();
-        }
-    }
-
-    // Destroy queue
-    ~concurrent_bounded_queue() {
-        clear();
-        my_queue_representation->clear();
-        queue_allocator_traits::destroy(my_allocator, my_queue_representation);
-        r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation),
-                                         sizeof(queue_representation_type));
-    }
-
-    // Enqueue an item at tail of queue.
-    void push( const T& value ) {
-        internal_push(value);
-    }
-
-    void push( T&& value ) {
-        internal_push(std::move(value));
-    }
-
-    // Enqueue an item at tail of queue if queue is not already full.
-    // Does not wait for queue to become not full.
-    // Returns true if item is pushed; false if queue was already full.
-    bool try_push( const T& value ) {
-        return internal_push_if_not_full(value);
-    }
-
-    bool try_push( T&& value ) {
-        return internal_push_if_not_full(std::move(value));
-    }
-
-    template <typename... Args>
-    void emplace( Args&&... args ) {
-        internal_push(std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    bool try_emplace( Args&&... args ) {
-        return internal_push_if_not_full(std::forward<Args>(args)...);
-    }
-
-    // Attempt to dequeue an item from head of queue.
-    /** Does not wait for item to become available.
-        Returns true if successful; false otherwise. */
-    bool pop( T& result ) {
-        return internal_pop(&result);
-    }
-
-    bool try_pop( T& result ) {
-        return internal_pop_if_present(&result);
-    }
-
-    void abort() {
-        internal_abort();
-    }
-
-    // Return the number of items in the queue; thread unsafe
-    std::ptrdiff_t size() const {
-        return my_queue_representation->size();
-    }
-
-    void set_capacity( size_type new_capacity ) {
-        std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity;
-        my_capacity = c;
-    }
-
-    size_type capacity() const {
-        return my_capacity;
-    }
-
-    // Equivalent to size()==0.
-    __TBB_nodiscard bool empty() const {
-        return my_queue_representation->empty();
-    }
-
-    // Clear the queue. not thread-safe.
-    void clear() {
-        while (!empty()) {
-            T value;
-            try_pop(value);
-        }
-    }
-
-    // Return allocator object
-    allocator_type get_allocator() const { return my_allocator; }
-
-    //------------------------------------------------------------------------
-    // The iterators are intended only for debugging.  They are slow and not thread safe.
-    //------------------------------------------------------------------------
-
-    iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); }
-    iterator unsafe_end() { return iterator(); }
-    const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
-    const_iterator unsafe_end() const { return const_iterator(); }
-    const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
-    const_iterator unsafe_cend() const { return const_iterator(); }
-
-private:
-    void internal_swap( concurrent_bounded_queue& src ) {
-        std::swap(my_queue_representation, src.my_queue_representation);
-        std::swap(my_monitors, src.my_monitors);
-    }
-
-    static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2);
-
-    template <typename... Args>
-    void internal_push( Args&&... args ) {
-        unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed);
-        ticket_type ticket = my_queue_representation->tail_counter++;
-        std::ptrdiff_t target = ticket - my_capacity;
-
-        if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full
-            auto pred = [&] {
-                if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) {
-                    throw_exception(exception_id::user_abort);
-                }
-
-                return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target;
-            };
-
-            try_call( [&] {
-                internal_wait(my_monitors, cbq_slots_avail_tag, target, pred);
-            }).on_exception( [&] {
-                my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation);
-            });
-
-        }
-        __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr);
-        my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...);
-        r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket);
-    }
-
-    template <typename... Args>
-    bool internal_push_if_not_full( Args&&... args ) {
-        ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed);
-        do {
-            if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) {
-                // Queue is full
-                return false;
-            }
-            // Queue had empty slot with ticket k when we looked. Attempt to claim that slot.
-            // Another thread claimed the slot, so retry.
-        } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1));
-
-        my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...);
-        r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket);
-        return true;
-    }
-
-    bool internal_pop( void* dst ) {
-        std::ptrdiff_t target;
-        // This loop is a single pop operation; abort_counter should not be re-read inside
-        unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed);
-
-        do {
-            target = my_queue_representation->head_counter++;
-            if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) {
-                auto pred = [&] {
-                    if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) {
-                            throw_exception(exception_id::user_abort);
-                    }
-
-                    return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target;
-                };
-
-                try_call( [&] {
-                    internal_wait(my_monitors, cbq_items_avail_tag, target, pred);
-                }).on_exception( [&] {
-                    my_queue_representation->head_counter--;
-                });
-            }
-            __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr);
-        } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation));
-
-        r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target);
-        return true;
-    }
-
-    bool internal_pop_if_present( void* dst ) {
-        ticket_type ticket;
-        do {
-            ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed);
-            do {
-                if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty
-                    // Queue is empty
-                    return false;
-                }
-                // Queue had item with ticket k when we looked.  Attempt to get that item.
-                // Another thread snatched the item, retry.
-            } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1));
-        } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation));
-
-        r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket);
-        return true;
-    }
-
-    void internal_abort() {
-        ++my_abort_counter;
-        r1::abort_bounded_queue_monitors(my_monitors);
-    }
-
-    static void copy_construct_item(T* location, const void* src) {
-        // TODO: use allocator_traits for copy construction
-        new (location) value_type(*static_cast<const value_type*>(src));
-    }
-
-    static void move_construct_item(T* location, const void* src) {
-        // TODO: use allocator_traits for move construction
-        new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src))));
-    }
-
-    template <typename Container, typename Value, typename A>
-    friend class concurrent_queue_iterator;
-
-    queue_allocator_type my_allocator;
-    std::ptrdiff_t my_capacity;
-    std::atomic<unsigned> my_abort_counter;
-    queue_representation_type* my_queue_representation;
-
-    r1::concurrent_monitor* my_monitors;
-}; // class concurrent_bounded_queue
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-// Deduction guide for the constructor from two iterators
-template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>>
-concurrent_bounded_queue( It, It, Alloc = Alloc() )
--> concurrent_bounded_queue<iterator_value_t<It>, Alloc>;
-
-#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
-
-} //namespace d1
-} // namesapce detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_queue;
-using detail::d1::concurrent_bounded_queue;
-using detail::r1::user_abort;
-using detail::r1::bad_last_alloc;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_queue_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_queue_H 
+#define __TBB_concurrent_queue_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_concurrent_queue_base.h" 
+#include "detail/_allocator_traits.h" 
+#include "detail/_exception.h" 
+#include "detail/_containers_helpers.h" 
+#include "cache_aligned_allocator.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+// A high-performance thread-safe non-blocking concurrent queue. 
+// Multiple threads may each push and pop concurrently. 
+// Assignment construction is not allowed. 
+template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> 
+class concurrent_queue { 
+    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 
+    using queue_representation_type = concurrent_queue_rep<T, Allocator>; 
+    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; 
+    using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; 
+public: 
+    using size_type = std::size_t; 
+    using value_type = T; 
+    using reference = T&; 
+    using const_reference = const T&; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    using allocator_type = Allocator; 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>; 
+    using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>; 
+ 
+    concurrent_queue() : concurrent_queue(allocator_type()) {} 
+ 
+    explicit concurrent_queue(const allocator_type& a) : 
+        my_allocator(a), my_queue_representation(nullptr) 
+    { 
+        my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type))); 
+        queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); 
+ 
+        __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : 
+        concurrent_queue(a) 
+    { 
+        for (; begin != end; ++begin) 
+            push(*begin); 
+    } 
+ 
+    concurrent_queue(const concurrent_queue& src, const allocator_type& a) : 
+        concurrent_queue(a) 
+    { 
+        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); 
+    } 
+ 
+    concurrent_queue(const concurrent_queue& src) : 
+        concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) 
+    { 
+        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); 
+    } 
+ 
+    // Move constructors 
+    concurrent_queue(concurrent_queue&& src) : 
+        concurrent_queue(std::move(src.my_allocator)) 
+    { 
+        internal_swap(src); 
+    } 
+ 
+    concurrent_queue(concurrent_queue&& src, const allocator_type& a) : 
+        concurrent_queue(a) 
+    { 
+        // checking that memory allocated by one instance of allocator can be deallocated 
+        // with another 
+        if (my_allocator == src.my_allocator) { 
+            internal_swap(src); 
+        } else { 
+            // allocators are different => performing per-element move 
+            my_queue_representation->assign(*src.my_queue_representation, move_construct_item); 
+            src.clear(); 
+        } 
+    } 
+ 
+    // Destroy queue 
+    ~concurrent_queue() { 
+        clear(); 
+        my_queue_representation->clear(); 
+        queue_allocator_traits::destroy(my_allocator, my_queue_representation); 
+        r1::cache_aligned_deallocate(my_queue_representation); 
+    } 
+ 
+    // Enqueue an item at tail of queue. 
+    void push(const T& value) { 
+        internal_push(value); 
+    } 
+ 
+    void push(T&& value) { 
+        internal_push(std::move(value)); 
+    } 
+ 
+    template <typename... Args> 
+    void emplace( Args&&... args ) { 
+        internal_push(std::forward<Args>(args)...); 
+    } 
+ 
+    // Attempt to dequeue an item from head of queue. 
+    /** Does not wait for item to become available. 
+        Returns true if successful; false otherwise. */ 
+    bool try_pop( T& result ) { 
+        return internal_try_pop(&result); 
+    } 
+ 
+    // Return the number of items in the queue; thread unsafe 
+    size_type unsafe_size() const { 
+        std::ptrdiff_t size = my_queue_representation->size(); 
+        return size < 0 ? 0 :  size_type(size); 
+    } 
+ 
+    // Equivalent to size()==0. 
+    __TBB_nodiscard bool empty() const { 
+        return my_queue_representation->empty(); 
+    } 
+ 
+    // Clear the queue. not thread-safe. 
+    void clear() { 
+        while (!empty()) { 
+            T value; 
+            try_pop(value); 
+        } 
+    } 
+ 
+    // Return allocator object 
+    allocator_type get_allocator() const { return my_allocator; } 
+ 
+    //------------------------------------------------------------------------ 
+    // The iterators are intended only for debugging.  They are slow and not thread safe. 
+    //------------------------------------------------------------------------ 
+ 
+    iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } 
+    iterator unsafe_end() { return iterator(); } 
+    const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } 
+    const_iterator unsafe_end() const { return const_iterator(); } 
+    const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } 
+    const_iterator unsafe_cend() const { return const_iterator(); } 
+ 
+private: 
+    void internal_swap(concurrent_queue& src) { 
+        using std::swap; 
+        swap(my_queue_representation, src.my_queue_representation); 
+    } 
+ 
+    template <typename... Args> 
+    void internal_push( Args&&... args ) { 
+        ticket_type k = my_queue_representation->tail_counter++; 
+        my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...); 
+    } 
+ 
+    bool internal_try_pop( void* dst ) { 
+        ticket_type k; 
+        do { 
+            k = my_queue_representation->head_counter.load(std::memory_order_relaxed); 
+            do { 
+                if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { 
+                    // Queue is empty 
+                    return false; 
+                } 
+ 
+                // Queue had item with ticket k when we looked. Attempt to get that item. 
+                // Another thread snatched the item, retry. 
+            } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); 
+        } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation)); 
+        return true; 
+    } 
+ 
+    template <typename Container, typename Value, typename A> 
+    friend class concurrent_queue_iterator; 
+ 
+    static void copy_construct_item(T* location, const void* src) { 
+        // TODO: use allocator_traits for copy construction 
+        new (location) value_type(*static_cast<const value_type*>(src)); 
+        // queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src)); 
+    } 
+ 
+    static void move_construct_item(T* location, const void* src) { 
+        // TODO: use allocator_traits for move construction 
+        new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); 
+    } 
+ 
+    queue_allocator_type my_allocator; 
+    queue_representation_type* my_queue_representation; 
+}; // class concurrent_queue 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+// Deduction guide for the constructor from two iterators 
+template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_queue( It, It, Alloc = Alloc() ) 
+-> concurrent_queue<iterator_value_t<It>, Alloc>; 
+ 
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ 
+ 
+class concurrent_monitor; 
+ 
+template <typename FuncType> 
+class delegated_function : public delegate_base { 
+public: 
+    delegated_function(FuncType& f) : my_func(f) {} 
+ 
+    bool operator()() const override { 
+        return my_func(); 
+    } 
+ 
+private: 
+    FuncType &my_func; 
+}; // class delegated_function 
+ 
+// The concurrent monitor tags for concurrent_bounded_queue. 
+static constexpr std::size_t cbq_slots_avail_tag = 0; 
+static constexpr std::size_t cbq_items_avail_tag = 1; 
+} // namespace d1 
+ 
+ 
+namespace r1 { 
+    class concurrent_monitor; 
+ 
+    std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); 
+    void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); 
+    void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); 
+    void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag 
+                                                            , std::size_t ticket ); 
+    void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, 
+                                                            std::ptrdiff_t target, d1::delegate_base& predicate ); 
+} // namespace r1 
+ 
+ 
+namespace d1 { 
+// A high-performance thread-safe blocking concurrent bounded queue. 
+// Supports boundedness and blocking semantics. 
+// Multiple threads may each push and pop concurrently. 
+// Assignment construction is not allowed. 
+template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> 
+class concurrent_bounded_queue { 
+    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 
+    using queue_representation_type = concurrent_queue_rep<T, Allocator>; 
+    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; 
+    using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; 
+ 
+    template <typename FuncType> 
+    void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { 
+        delegated_function<FuncType> func(pred); 
+        r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); 
+    } 
+public: 
+    using size_type = std::ptrdiff_t; 
+    using value_type = T; 
+    using reference = T&; 
+    using const_reference = const T&; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    using allocator_type = Allocator; 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>; 
+    using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ; 
+ 
+    concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {} 
+ 
+    explicit concurrent_bounded_queue( const allocator_type& a ) : 
+        my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr) 
+    { 
+        my_queue_representation = reinterpret_cast<queue_representation_type*>( 
+            r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); 
+        my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1); 
+        queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); 
+        my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); 
+ 
+        __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); 
+        __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) : 
+        concurrent_bounded_queue(a) 
+    { 
+        for (; begin != end; ++begin) 
+            push(*begin); 
+    } 
+ 
+    concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : 
+        concurrent_bounded_queue(a) 
+    { 
+        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); 
+    } 
+ 
+    concurrent_bounded_queue( const concurrent_bounded_queue& src ) : 
+        concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) 
+    { 
+        my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); 
+    } 
+ 
+    // Move constructors 
+    concurrent_bounded_queue( concurrent_bounded_queue&& src ) : 
+        concurrent_bounded_queue(std::move(src.my_allocator)) 
+    { 
+        internal_swap(src); 
+    } 
+ 
+    concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) : 
+        concurrent_bounded_queue(a) 
+    { 
+        // checking that memory allocated by one instance of allocator can be deallocated 
+        // with another 
+        if (my_allocator == src.my_allocator) { 
+            internal_swap(src); 
+        } else { 
+            // allocators are different => performing per-element move 
+            my_queue_representation->assign(*src.my_queue_representation, move_construct_item); 
+            src.clear(); 
+        } 
+    } 
+ 
+    // Destroy queue 
+    ~concurrent_bounded_queue() { 
+        clear(); 
+        my_queue_representation->clear(); 
+        queue_allocator_traits::destroy(my_allocator, my_queue_representation); 
+        r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation), 
+                                         sizeof(queue_representation_type)); 
+    } 
+ 
+    // Enqueue an item at tail of queue. 
+    void push( const T& value ) { 
+        internal_push(value); 
+    } 
+ 
+    void push( T&& value ) { 
+        internal_push(std::move(value)); 
+    } 
+ 
+    // Enqueue an item at tail of queue if queue is not already full. 
+    // Does not wait for queue to become not full. 
+    // Returns true if item is pushed; false if queue was already full. 
+    bool try_push( const T& value ) { 
+        return internal_push_if_not_full(value); 
+    } 
+ 
+    bool try_push( T&& value ) { 
+        return internal_push_if_not_full(std::move(value)); 
+    } 
+ 
+    template <typename... Args> 
+    void emplace( Args&&... args ) { 
+        internal_push(std::forward<Args>(args)...); 
+    } 
+ 
+    template <typename... Args> 
+    bool try_emplace( Args&&... args ) { 
+        return internal_push_if_not_full(std::forward<Args>(args)...); 
+    } 
+ 
+    // Attempt to dequeue an item from head of queue. 
+    /** Does not wait for item to become available. 
+        Returns true if successful; false otherwise. */ 
+    bool pop( T& result ) { 
+        return internal_pop(&result); 
+    } 
+ 
+    bool try_pop( T& result ) { 
+        return internal_pop_if_present(&result); 
+    } 
+ 
+    void abort() { 
+        internal_abort(); 
+    } 
+ 
+    // Return the number of items in the queue; thread unsafe 
+    std::ptrdiff_t size() const { 
+        return my_queue_representation->size(); 
+    } 
+ 
+    void set_capacity( size_type new_capacity ) { 
+        std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity; 
+        my_capacity = c; 
+    } 
+ 
+    size_type capacity() const { 
+        return my_capacity; 
+    } 
+ 
+    // Equivalent to size()==0. 
+    __TBB_nodiscard bool empty() const { 
+        return my_queue_representation->empty(); 
+    } 
+ 
+    // Clear the queue. not thread-safe. 
+    void clear() { 
+        while (!empty()) { 
+            T value; 
+            try_pop(value); 
+        } 
+    } 
+ 
+    // Return allocator object 
+    allocator_type get_allocator() const { return my_allocator; } 
+ 
+    //------------------------------------------------------------------------ 
+    // The iterators are intended only for debugging.  They are slow and not thread safe. 
+    //------------------------------------------------------------------------ 
+ 
+    iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } 
+    iterator unsafe_end() { return iterator(); } 
+    const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } 
+    const_iterator unsafe_end() const { return const_iterator(); } 
+    const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } 
+    const_iterator unsafe_cend() const { return const_iterator(); } 
+ 
+private: 
+    void internal_swap( concurrent_bounded_queue& src ) { 
+        std::swap(my_queue_representation, src.my_queue_representation); 
+        std::swap(my_monitors, src.my_monitors); 
+    } 
+ 
+    static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2); 
+ 
+    template <typename... Args> 
+    void internal_push( Args&&... args ) { 
+        unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); 
+        ticket_type ticket = my_queue_representation->tail_counter++; 
+        std::ptrdiff_t target = ticket - my_capacity; 
+ 
+        if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full 
+            auto pred = [&] { 
+                if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { 
+                    throw_exception(exception_id::user_abort); 
+                } 
+ 
+                return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target; 
+            }; 
+ 
+            try_call( [&] { 
+                internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); 
+            }).on_exception( [&] { 
+                my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation); 
+            }); 
+ 
+        } 
+        __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); 
+        my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); 
+        r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); 
+    } 
+ 
+    template <typename... Args> 
+    bool internal_push_if_not_full( Args&&... args ) { 
+        ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed); 
+        do { 
+            if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) { 
+                // Queue is full 
+                return false; 
+            } 
+            // Queue had empty slot with ticket k when we looked. Attempt to claim that slot. 
+            // Another thread claimed the slot, so retry. 
+        } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); 
+ 
+        my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); 
+        r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); 
+        return true; 
+    } 
+ 
+    bool internal_pop( void* dst ) { 
+        std::ptrdiff_t target; 
+        // This loop is a single pop operation; abort_counter should not be re-read inside 
+        unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); 
+ 
+        do { 
+            target = my_queue_representation->head_counter++; 
+            if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) { 
+                auto pred = [&] { 
+                    if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { 
+                            throw_exception(exception_id::user_abort); 
+                    } 
+ 
+                    return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target; 
+                }; 
+ 
+                try_call( [&] { 
+                    internal_wait(my_monitors, cbq_items_avail_tag, target, pred); 
+                }).on_exception( [&] { 
+                    my_queue_representation->head_counter--; 
+                }); 
+            } 
+            __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); 
+        } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation)); 
+ 
+        r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); 
+        return true; 
+    } 
+ 
+    bool internal_pop_if_present( void* dst ) { 
+        ticket_type ticket; 
+        do { 
+            ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); 
+            do { 
+                if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty 
+                    // Queue is empty 
+                    return false; 
+                } 
+                // Queue had item with ticket k when we looked.  Attempt to get that item. 
+                // Another thread snatched the item, retry. 
+            } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); 
+        } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation)); 
+ 
+        r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); 
+        return true; 
+    } 
+ 
+    void internal_abort() { 
+        ++my_abort_counter; 
+        r1::abort_bounded_queue_monitors(my_monitors); 
+    } 
+ 
+    static void copy_construct_item(T* location, const void* src) { 
+        // TODO: use allocator_traits for copy construction 
+        new (location) value_type(*static_cast<const value_type*>(src)); 
+    } 
+ 
+    static void move_construct_item(T* location, const void* src) { 
+        // TODO: use allocator_traits for move construction 
+        new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); 
+    } 
+ 
+    template <typename Container, typename Value, typename A> 
+    friend class concurrent_queue_iterator; 
+ 
+    queue_allocator_type my_allocator; 
+    std::ptrdiff_t my_capacity; 
+    std::atomic<unsigned> my_abort_counter; 
+    queue_representation_type* my_queue_representation; 
+ 
+    r1::concurrent_monitor* my_monitors; 
+}; // class concurrent_bounded_queue 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+// Deduction guide for the constructor from two iterators 
+template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>> 
+concurrent_bounded_queue( It, It, Alloc = Alloc() ) 
+-> concurrent_bounded_queue<iterator_value_t<It>, Alloc>; 
+ 
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ 
+ 
+} //namespace d1 
+} // namesapce detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_queue; 
+using detail::d1::concurrent_bounded_queue; 
+using detail::r1::user_abort; 
+using detail::r1::bad_last_alloc; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_queue_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h
index c68fa6c362..6baee7f1e8 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h
@@ -1,259 +1,259 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_set_H
-#define __TBB_concurrent_set_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_concurrent_skip_list.h"
-#include "tbb_allocator.h"
-#include <functional>
-#include <utility>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping>
-struct set_traits {
-    static constexpr std::size_t max_level = RandomGenerator::max_level;
-    using random_level_generator_type = RandomGenerator;
-    using key_type = Key;
-    using value_type = key_type;
-    using compare_type = KeyCompare;
-    using value_compare = compare_type;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using allocator_type = Allocator;
-
-    static constexpr bool allow_multimapping = AllowMultimapping;
-
-    static const key_type& get_key(const_reference val) {
-        return val;
-    }
-
-    static value_compare value_comp(compare_type comp) { return comp; }
-}; // struct set_traits
-
-template <typename Key, typename Compare, typename Allocator>
-class concurrent_multiset;
-
-template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>>
-class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> {
-    using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>;
-public:
-    using key_type = Key;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using key_compare = Compare;
-    using value_compare = typename base_type::value_compare;
-    using allocator_type = Allocator;
-
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base_type
-    using base_type::base_type;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_set() = default;
-    concurrent_set( const concurrent_set& ) = default;
-    concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_set( concurrent_set&& ) = default;
-    concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_set& operator=( const concurrent_set& ) = default;
-    concurrent_set& operator=( concurrent_set&& ) = default;
-
-    template<typename OtherCompare>
-    void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_set
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Comp = std::less<iterator_value_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() )
--> concurrent_set<iterator_value_t<It>, Comp, Alloc>;
-
-template <typename Key,
-          typename Comp = std::less<Key>,
-          typename Alloc = tbb::tbb_allocator<Key>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() )
--> concurrent_set<Key, Comp, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_set( It, It, Alloc )
--> concurrent_set<iterator_value_t<It>,
-                  std::less<iterator_value_t<It>>, Alloc>;
-
-template <typename Key, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_set( std::initializer_list<Key>, Alloc )
--> concurrent_set<Key, std::less<Key>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Compare, typename Allocator>
-void swap( concurrent_set<Key, Compare, Allocator>& lhs,
-           concurrent_set<Key, Compare, Allocator>& rhs )
-{
-    lhs.swap(rhs);
-}
-
-template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>>
-class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> {
-    using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>;
-public:
-    using key_type = Key;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using key_compare = Compare;
-    using value_compare = typename base_type::value_compare;
-    using allocator_type = Allocator;
-
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base_type;
-    using base_type::base_type;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_multiset() = default;
-    concurrent_multiset( const concurrent_multiset& ) = default;
-    concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_multiset( concurrent_multiset&& ) = default;
-    concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_multiset& operator=( const concurrent_multiset& ) = default;
-    concurrent_multiset& operator=( concurrent_multiset&& ) = default;
-
-    template<typename OtherCompare>
-    void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) {
-        this->internal_merge(source);
-    }
-
-    template<typename OtherCompare>
-    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_multiset
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Comp = std::less<iterator_value_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() )
--> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>;
-
-template <typename Key,
-          typename Comp = std::less<Key>,
-          typename Alloc = tbb::tbb_allocator<Key>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Comp>>>
-concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() )
--> concurrent_multiset<Key, Comp, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_multiset( It, It, Alloc )
--> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>;
-
-template <typename Key, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_multiset( std::initializer_list<Key>, Alloc )
--> concurrent_multiset<Key, std::less<Key>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Compare, typename Allocator>
-void swap( concurrent_multiset<Key, Compare, Allocator>& lhs,
-           concurrent_multiset<Key, Compare, Allocator>& rhs )
-{
-    lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_set;
-using detail::d1::concurrent_multiset;
-using detail::split;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_set_H
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_set_H 
+#define __TBB_concurrent_set_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_concurrent_skip_list.h" 
+#include "tbb_allocator.h" 
+#include <functional> 
+#include <utility> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping> 
+struct set_traits { 
+    static constexpr std::size_t max_level = RandomGenerator::max_level; 
+    using random_level_generator_type = RandomGenerator; 
+    using key_type = Key; 
+    using value_type = key_type; 
+    using compare_type = KeyCompare; 
+    using value_compare = compare_type; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using allocator_type = Allocator; 
+ 
+    static constexpr bool allow_multimapping = AllowMultimapping; 
+ 
+    static const key_type& get_key(const_reference val) { 
+        return val; 
+    } 
+ 
+    static value_compare value_comp(compare_type comp) { return comp; } 
+}; // struct set_traits 
+ 
+template <typename Key, typename Compare, typename Allocator> 
+class concurrent_multiset; 
+ 
+template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> 
+class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { 
+    using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; 
+public: 
+    using key_type = Key; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using key_compare = Compare; 
+    using value_compare = typename base_type::value_compare; 
+    using allocator_type = Allocator; 
+ 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+ 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+ 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base_type 
+    using base_type::base_type; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_set() = default; 
+    concurrent_set( const concurrent_set& ) = default; 
+    concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_set( concurrent_set&& ) = default; 
+    concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_set& operator=( const concurrent_set& ) = default; 
+    concurrent_set& operator=( concurrent_set&& ) = default; 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_set 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Comp = std::less<iterator_value_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_set<iterator_value_t<It>, Comp, Alloc>; 
+ 
+template <typename Key, 
+          typename Comp = std::less<Key>, 
+          typename Alloc = tbb::tbb_allocator<Key>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_set<Key, Comp, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_set( It, It, Alloc ) 
+-> concurrent_set<iterator_value_t<It>, 
+                  std::less<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename Key, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_set( std::initializer_list<Key>, Alloc ) 
+-> concurrent_set<Key, std::less<Key>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Compare, typename Allocator> 
+void swap( concurrent_set<Key, Compare, Allocator>& lhs, 
+           concurrent_set<Key, Compare, Allocator>& rhs ) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> 
+class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { 
+    using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; 
+public: 
+    using key_type = Key; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using key_compare = Compare; 
+    using value_compare = typename base_type::value_compare; 
+    using allocator_type = Allocator; 
+ 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+ 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+ 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base_type; 
+    using base_type::base_type; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_multiset() = default; 
+    concurrent_multiset( const concurrent_multiset& ) = default; 
+    concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_multiset( concurrent_multiset&& ) = default; 
+    concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_multiset& operator=( const concurrent_multiset& ) = default; 
+    concurrent_multiset& operator=( concurrent_multiset&& ) = default; 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template<typename OtherCompare> 
+    void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_multiset 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Comp = std::less<iterator_value_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>; 
+ 
+template <typename Key, 
+          typename Comp = std::less<Key>, 
+          typename Alloc = tbb::tbb_allocator<Key>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Comp>>> 
+concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) 
+-> concurrent_multiset<Key, Comp, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_multiset( It, It, Alloc ) 
+-> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename Key, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_multiset( std::initializer_list<Key>, Alloc ) 
+-> concurrent_multiset<Key, std::less<Key>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Compare, typename Allocator> 
+void swap( concurrent_multiset<Key, Compare, Allocator>& lhs, 
+           concurrent_multiset<Key, Compare, Allocator>& rhs ) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_set; 
+using detail::d1::concurrent_multiset; 
+using detail::split; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_set_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h
index 0c9c2cd79c..6e582f25e6 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h
@@ -1,387 +1,387 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_unordered_map_H
-#define __TBB_concurrent_unordered_map_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_concurrent_unordered_base.h"
-#include "tbb_allocator.h"
-#include <functional>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping>
-struct concurrent_unordered_map_traits {
-    using value_type = std::pair<const Key, T>;
-    using key_type = Key;
-    using allocator_type = Allocator;
-    using hash_compare_type = hash_compare<Key, Hash, KeyEqual>;
-    static constexpr bool allow_multimapping = AllowMultimapping;
-
-    static constexpr const key_type& get_key( const value_type& value ) {
-        return value.first;
-    }
-}; // struct concurrent_unordered_map_traits
-
-template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
-class concurrent_unordered_multimap;
-
-template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> >
-class concurrent_unordered_map
-    : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>>
-{
-    using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>;
-    using base_type = concurrent_unordered_base<traits_type>;
-public:
-    using key_type = typename base_type::key_type;
-    using mapped_type = T;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using hasher = typename base_type::hasher;
-    using key_equal = typename base_type::key_equal;
-    using allocator_type = typename base_type::allocator_type;
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-    using local_iterator = typename base_type::local_iterator;
-    using const_local_iterator = typename base_type::const_local_iterator;
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base type
-    using base_type::base_type;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_unordered_map() = default;
-    concurrent_unordered_map( const concurrent_unordered_map& ) = default;
-    concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_unordered_map( concurrent_unordered_map&& ) = default;
-    concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default;
-    concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default;
-
-    // Observers
-    mapped_type& operator[]( const key_type& key ) {
-        iterator where = this->find(key);
-
-        if (where == this->end()) {
-            where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first;
-        }
-        return where->second;
-    }
-
-    mapped_type& operator[]( key_type&& key ) {
-        iterator where = this->find(key);
-
-        if (where == this->end()) {
-            where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first;
-        }
-        return where->second;
-    }
-
-    mapped_type& at( const key_type& key ) {
-        iterator where = this->find(key);
-
-        if (where == this->end()) {
-            throw_exception(exception_id::invalid_key);
-        }
-        return where->second;
-    }
-
-    const mapped_type& at( const key_type& key ) const {
-        const_iterator where = this->find(key);
-
-        if (where == this->end()) {
-            throw_exception(exception_id::out_of_range);
-        }
-        return where->second;
-    }
-
-    using base_type::insert;
-
-    template<typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            std::pair<iterator, bool>>::type insert( P&& value ) {
-        return this->emplace(std::forward<P>(value));
-    }
-
-    template<typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            iterator>::type insert( const_iterator hint, P&& value ) {
-        return this->emplace_hint(hint, std::forward<P>(value));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_unordered_map
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename It,
-          typename Hash = std::hash<iterator_key_t<It>>,
-          typename KeyEq = std::equal_to<iterator_key_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_map( It, It, std::size_t =  {},
-                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>;
-
-template <typename Key, typename T,
-          typename Hash = std::hash<std::remove_const_t<Key>>,
-          typename KeyEq = std::equal_to<std::remove_const_t<Key>>,
-          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {},
-                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_map( It, It, std::size_t, Alloc )
--> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>,
-                            std::hash<iterator_key_t<It>>,
-                            std::equal_to<iterator_key_t<It>>, Alloc>;
-
-// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed
-
-template <typename It, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_map( It, It, std::size_t, Hash, Alloc )
--> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>,
-                            Hash, std::equal_to<iterator_key_t<It>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc )
--> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
-                            std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc )
--> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
-                            std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-template <typename Key, typename T, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc )
--> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash,
-                            std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
-void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs,
-           concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) {
-    lhs.swap(rhs);
-}
-
-template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> >
-class concurrent_unordered_multimap
-    : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>>
-{
-    using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>;
-    using base_type = concurrent_unordered_base<traits_type>;
-public:
-    using key_type = typename base_type::key_type;
-    using mapped_type = T;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using hasher = typename base_type::hasher;
-    using key_equal = typename base_type::key_equal;
-    using allocator_type = typename base_type::allocator_type;
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-    using local_iterator = typename base_type::local_iterator;
-    using const_local_iterator = typename base_type::const_local_iterator;
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base type
-    using base_type::base_type;
-    using base_type::operator=;
-    using base_type::insert;
-
-    // Required for implicit deduction guides
-    concurrent_unordered_multimap() = default;
-    concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default;
-    concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default;
-    concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default;
-    concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default;
-
-    template <typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            std::pair<iterator, bool>>::type insert( P&& value ) {
-        return this->emplace(std::forward<P>(value));
-    }
-
-    template<typename P>
-    typename std::enable_if<std::is_constructible<value_type, P&&>::value,
-                            iterator>::type insert( const_iterator hint, P&& value ) {
-        return this->emplace_hint(hint, std::forward<P&&>(value));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_unordered_multimap
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Hash = std::hash<iterator_key_t<It>>,
-          typename KeyEq = std::equal_to<iterator_key_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>;
-
-template <typename Key, typename T,
-          typename Hash = std::hash<std::remove_const_t<Key>>,
-          typename KeyEq = std::equal_to<std::remove_const_t<Key>>,
-          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {},
-                               Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multimap( It, It, std::size_t, Alloc )
--> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>,
-                                 std::hash<iterator_key_t<It>>,
-                                 std::equal_to<iterator_key_t<It>>, Alloc>;
-
-template <typename It, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc )
--> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash,
-                                 std::equal_to<iterator_key_t<It>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc )
--> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
-                                 std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-template <typename Key, typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc )
--> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
-                                 std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-template <typename Key, typename T, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc )
--> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash,
-                                 std::equal_to<std::remove_const_t<Key>>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
-void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs,
-           concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) {
-    lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_unordered_map;
-using detail::d1::concurrent_unordered_multimap;
-using detail::split;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_unordered_map_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_unordered_map_H 
+#define __TBB_concurrent_unordered_map_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_concurrent_unordered_base.h" 
+#include "tbb_allocator.h" 
+#include <functional> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> 
+struct concurrent_unordered_map_traits { 
+    using value_type = std::pair<const Key, T>; 
+    using key_type = Key; 
+    using allocator_type = Allocator; 
+    using hash_compare_type = hash_compare<Key, Hash, KeyEqual>; 
+    static constexpr bool allow_multimapping = AllowMultimapping; 
+ 
+    static constexpr const key_type& get_key( const value_type& value ) { 
+        return value.first; 
+    } 
+}; // struct concurrent_unordered_map_traits 
+ 
+template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> 
+class concurrent_unordered_multimap; 
+ 
+template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, 
+          typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > 
+class concurrent_unordered_map 
+    : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>> 
+{ 
+    using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>; 
+    using base_type = concurrent_unordered_base<traits_type>; 
+public: 
+    using key_type = typename base_type::key_type; 
+    using mapped_type = T; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using hasher = typename base_type::hasher; 
+    using key_equal = typename base_type::key_equal; 
+    using allocator_type = typename base_type::allocator_type; 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+    using local_iterator = typename base_type::local_iterator; 
+    using const_local_iterator = typename base_type::const_local_iterator; 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base type 
+    using base_type::base_type; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_unordered_map() = default; 
+    concurrent_unordered_map( const concurrent_unordered_map& ) = default; 
+    concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_unordered_map( concurrent_unordered_map&& ) = default; 
+    concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default; 
+    concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default; 
+ 
+    // Observers 
+    mapped_type& operator[]( const key_type& key ) { 
+        iterator where = this->find(key); 
+ 
+        if (where == this->end()) { 
+            where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; 
+        } 
+        return where->second; 
+    } 
+ 
+    mapped_type& operator[]( key_type&& key ) { 
+        iterator where = this->find(key); 
+ 
+        if (where == this->end()) { 
+            where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; 
+        } 
+        return where->second; 
+    } 
+ 
+    mapped_type& at( const key_type& key ) { 
+        iterator where = this->find(key); 
+ 
+        if (where == this->end()) { 
+            throw_exception(exception_id::invalid_key); 
+        } 
+        return where->second; 
+    } 
+ 
+    const mapped_type& at( const key_type& key ) const { 
+        const_iterator where = this->find(key); 
+ 
+        if (where == this->end()) { 
+            throw_exception(exception_id::out_of_range); 
+        } 
+        return where->second; 
+    } 
+ 
+    using base_type::insert; 
+ 
+    template<typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            std::pair<iterator, bool>>::type insert( P&& value ) { 
+        return this->emplace(std::forward<P>(value)); 
+    } 
+ 
+    template<typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            iterator>::type insert( const_iterator hint, P&& value ) { 
+        return this->emplace_hint(hint, std::forward<P>(value)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_unordered_map 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename It, 
+          typename Hash = std::hash<iterator_key_t<It>>, 
+          typename KeyEq = std::equal_to<iterator_key_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_map( It, It, std::size_t =  {}, 
+                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; 
+ 
+template <typename Key, typename T, 
+          typename Hash = std::hash<std::remove_const_t<Key>>, 
+          typename KeyEq = std::equal_to<std::remove_const_t<Key>>, 
+          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, 
+                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_map( It, It, std::size_t, Alloc ) 
+-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, 
+                            std::hash<iterator_key_t<It>>, 
+                            std::equal_to<iterator_key_t<It>>, Alloc>; 
+ 
+// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed 
+ 
+template <typename It, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_map( It, It, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, 
+                            Hash, std::equal_to<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) 
+-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, 
+                            std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc ) 
+-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, 
+                            std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, 
+                            std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> 
+void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs, 
+           concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) { 
+    lhs.swap(rhs); 
+} 
+ 
+template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, 
+          typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > 
+class concurrent_unordered_multimap 
+    : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>> 
+{ 
+    using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>; 
+    using base_type = concurrent_unordered_base<traits_type>; 
+public: 
+    using key_type = typename base_type::key_type; 
+    using mapped_type = T; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using hasher = typename base_type::hasher; 
+    using key_equal = typename base_type::key_equal; 
+    using allocator_type = typename base_type::allocator_type; 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+    using local_iterator = typename base_type::local_iterator; 
+    using const_local_iterator = typename base_type::const_local_iterator; 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base type 
+    using base_type::base_type; 
+    using base_type::operator=; 
+    using base_type::insert; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_unordered_multimap() = default; 
+    concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default; 
+    concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default; 
+    concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default; 
+    concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default; 
+ 
+    template <typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            std::pair<iterator, bool>>::type insert( P&& value ) { 
+        return this->emplace(std::forward<P>(value)); 
+    } 
+ 
+    template<typename P> 
+    typename std::enable_if<std::is_constructible<value_type, P&&>::value, 
+                            iterator>::type insert( const_iterator hint, P&& value ) { 
+        return this->emplace_hint(hint, std::forward<P&&>(value)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_unordered_multimap 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Hash = std::hash<iterator_key_t<It>>, 
+          typename KeyEq = std::equal_to<iterator_key_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; 
+ 
+template <typename Key, typename T, 
+          typename Hash = std::hash<std::remove_const_t<Key>>, 
+          typename KeyEq = std::equal_to<std::remove_const_t<Key>>, 
+          typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, 
+                               Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multimap( It, It, std::size_t, Alloc ) 
+-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, 
+                                 std::hash<iterator_key_t<It>>, 
+                                 std::equal_to<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename It, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, 
+                                 std::equal_to<iterator_key_t<It>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) 
+-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, 
+                                 std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) 
+-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, 
+                                 std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+template <typename Key, typename T, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, 
+                                 std::equal_to<std::remove_const_t<Key>>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> 
+void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs, 
+           concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) { 
+    lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_unordered_map; 
+using detail::d1::concurrent_unordered_multimap; 
+using detail::split; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_unordered_map_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h
index ce6175294d..bfe3a9785f 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h
@@ -1,306 +1,306 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_unordered_set_H
-#define __TBB_concurrent_unordered_set_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_concurrent_unordered_base.h"
-#include "tbb_allocator.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping>
-struct concurrent_unordered_set_traits {
-    using key_type = Key;
-    using value_type = key_type;
-    using allocator_type = Allocator;
-    using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>;
-    static constexpr bool allow_multimapping = AllowMultimapping;
-
-    static constexpr const key_type& get_key( const value_type& value ) {
-        return value;
-    }
-}; // class concurrent_unordered_set_traits
-
-template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
-class concurrent_unordered_multiset;
-
-template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          typename Allocator = tbb::tbb_allocator<Key>>
-class concurrent_unordered_set
-    : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>>
-{
-    using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>;
-    using base_type = concurrent_unordered_base<traits_type>;
-public:
-    using key_type = typename base_type::key_type;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using hasher = typename base_type::hasher;
-    using key_equal = typename base_type::key_equal;
-    using allocator_type = typename base_type::allocator_type;
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-    using local_iterator = typename base_type::local_iterator;
-    using const_local_iterator = typename base_type::const_local_iterator;
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base_type;
-    using base_type::base_type;
-    using base_type::operator=;
-    // Required for implicit deduction guides
-    concurrent_unordered_set() = default;
-    concurrent_unordered_set( const concurrent_unordered_set& ) = default;
-    concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_unordered_set( concurrent_unordered_set&& ) = default;
-    concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default;
-    concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default;
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_unordered_set
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename It,
-          typename Hash = std::hash<iterator_value_t<It>>,
-          typename KeyEq = std::equal_to<iterator_value_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>;
-
-template <typename T,
-          typename Hash = std::hash<T>,
-          typename KeyEq = std::equal_to<T>,
-          typename Alloc = tbb::tbb_allocator<T>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_set( std::initializer_list<T>, std::size_t = {},
-                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_set<T, Hash, KeyEq, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_set( It, It, std::size_t, Alloc )
--> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>,
-                            std::equal_to<iterator_value_t<It>>, Alloc>;
-
-template <typename It, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_set( It, It, std::size_t, Hash, Alloc )
--> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>;
-
-template <typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc )
--> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>;
-
-template <typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_set( std::initializer_list<T>, Alloc )
--> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>;
-
-template <typename T, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc )
--> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
-void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs,
-           concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) {
-    lhs.swap(rhs);
-}
-
-template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
-          typename Allocator = tbb::tbb_allocator<Key>>
-class concurrent_unordered_multiset
-    : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>>
-{
-    using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>;
-    using base_type = concurrent_unordered_base<traits_type>;
-public:
-    using key_type = typename base_type::key_type;
-    using value_type = typename base_type::value_type;
-    using size_type = typename base_type::size_type;
-    using difference_type = typename base_type::difference_type;
-    using hasher = typename base_type::hasher;
-    using key_equal = typename base_type::key_equal;
-    using allocator_type = typename base_type::allocator_type;
-    using reference = typename base_type::reference;
-    using const_reference = typename base_type::const_reference;
-    using pointer = typename base_type::pointer;
-    using const_pointer = typename base_type::const_pointer;
-    using iterator = typename base_type::iterator;
-    using const_iterator = typename base_type::const_iterator;
-    using local_iterator = typename base_type::local_iterator;
-    using const_local_iterator = typename base_type::const_local_iterator;
-    using node_type = typename base_type::node_type;
-
-    // Include constructors of base_type;
-    using base_type::base_type;
-    using base_type::operator=;
-
-    // Required for implicit deduction guides
-    concurrent_unordered_multiset() = default;
-    concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default;
-    concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {}
-    concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default;
-    concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
-    // Required to respect the rule of 5
-    concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default;
-    concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default;
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
-        this->internal_merge(source);
-    }
-
-    template <typename OtherHash, typename OtherKeyEqual>
-    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
-        this->internal_merge(std::move(source));
-    }
-}; // class concurrent_unordered_multiset
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename It,
-          typename Hash = std::hash<iterator_value_t<It>>,
-          typename KeyEq = std::equal_to<iterator_value_t<It>>,
-          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>;
-
-template <typename T,
-          typename Hash = std::hash<T>,
-          typename KeyEq = std::equal_to<T>,
-          typename Alloc = tbb::tbb_allocator<T>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {},
-                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
--> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>;
-
-template <typename It, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multiset( It, It, std::size_t, Alloc )
--> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>,
-                            std::equal_to<iterator_value_t<It>>, Alloc>;
-
-template <typename It, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc )
--> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>;
-
-template <typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc )
--> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>;
-
-template <typename T, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_unordered_multiset( std::initializer_list<T>, Alloc )
--> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>;
-
-template <typename T, typename Hash, typename Alloc,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>,
-          typename = std::enable_if_t<!is_allocator_v<Hash>>,
-          typename = std::enable_if_t<!std::is_integral_v<Hash>>>
-concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc )
--> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>;
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
-void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs,
-           concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) {
-    lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-
-using detail::d1::concurrent_unordered_set;
-using detail::d1::concurrent_unordered_multiset;
-using detail::split;
-
-} // inline namespace v1
-} // namespace tbb
-
-#endif // __TBB_concurrent_unordered_set_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_unordered_set_H 
+#define __TBB_concurrent_unordered_set_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_concurrent_unordered_base.h" 
+#include "tbb_allocator.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> 
+struct concurrent_unordered_set_traits { 
+    using key_type = Key; 
+    using value_type = key_type; 
+    using allocator_type = Allocator; 
+    using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>; 
+    static constexpr bool allow_multimapping = AllowMultimapping; 
+ 
+    static constexpr const key_type& get_key( const value_type& value ) { 
+        return value; 
+    } 
+}; // class concurrent_unordered_set_traits 
+ 
+template <typename Key, typename Hash, typename KeyEqual, typename Allocator> 
+class concurrent_unordered_multiset; 
+ 
+template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, 
+          typename Allocator = tbb::tbb_allocator<Key>> 
+class concurrent_unordered_set 
+    : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>> 
+{ 
+    using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>; 
+    using base_type = concurrent_unordered_base<traits_type>; 
+public: 
+    using key_type = typename base_type::key_type; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using hasher = typename base_type::hasher; 
+    using key_equal = typename base_type::key_equal; 
+    using allocator_type = typename base_type::allocator_type; 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+    using local_iterator = typename base_type::local_iterator; 
+    using const_local_iterator = typename base_type::const_local_iterator; 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base_type; 
+    using base_type::base_type; 
+    using base_type::operator=; 
+    // Required for implicit deduction guides 
+    concurrent_unordered_set() = default; 
+    concurrent_unordered_set( const concurrent_unordered_set& ) = default; 
+    concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_unordered_set( concurrent_unordered_set&& ) = default; 
+    concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default; 
+    concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default; 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_unordered_set 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename It, 
+          typename Hash = std::hash<iterator_value_t<It>>, 
+          typename KeyEq = std::equal_to<iterator_value_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>; 
+ 
+template <typename T, 
+          typename Hash = std::hash<T>, 
+          typename KeyEq = std::equal_to<T>, 
+          typename Alloc = tbb::tbb_allocator<T>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_set( std::initializer_list<T>, std::size_t = {}, 
+                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_set<T, Hash, KeyEq, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_set( It, It, std::size_t, Alloc ) 
+-> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>, 
+                            std::equal_to<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename It, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_set( It, It, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc ) 
+-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; 
+ 
+template <typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_set( std::initializer_list<T>, Alloc ) 
+-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; 
+ 
+template <typename T, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Hash, typename KeyEqual, typename Allocator> 
+void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs, 
+           concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) { 
+    lhs.swap(rhs); 
+} 
+ 
+template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, 
+          typename Allocator = tbb::tbb_allocator<Key>> 
+class concurrent_unordered_multiset 
+    : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>> 
+{ 
+    using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>; 
+    using base_type = concurrent_unordered_base<traits_type>; 
+public: 
+    using key_type = typename base_type::key_type; 
+    using value_type = typename base_type::value_type; 
+    using size_type = typename base_type::size_type; 
+    using difference_type = typename base_type::difference_type; 
+    using hasher = typename base_type::hasher; 
+    using key_equal = typename base_type::key_equal; 
+    using allocator_type = typename base_type::allocator_type; 
+    using reference = typename base_type::reference; 
+    using const_reference = typename base_type::const_reference; 
+    using pointer = typename base_type::pointer; 
+    using const_pointer = typename base_type::const_pointer; 
+    using iterator = typename base_type::iterator; 
+    using const_iterator = typename base_type::const_iterator; 
+    using local_iterator = typename base_type::local_iterator; 
+    using const_local_iterator = typename base_type::const_local_iterator; 
+    using node_type = typename base_type::node_type; 
+ 
+    // Include constructors of base_type; 
+    using base_type::base_type; 
+    using base_type::operator=; 
+ 
+    // Required for implicit deduction guides 
+    concurrent_unordered_multiset() = default; 
+    concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default; 
+    concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} 
+    concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default; 
+    concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} 
+    // Required to respect the rule of 5 
+    concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default; 
+    concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default; 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { 
+        this->internal_merge(source); 
+    } 
+ 
+    template <typename OtherHash, typename OtherKeyEqual> 
+    void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { 
+        this->internal_merge(std::move(source)); 
+    } 
+}; // class concurrent_unordered_multiset 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename It, 
+          typename Hash = std::hash<iterator_value_t<It>>, 
+          typename KeyEq = std::equal_to<iterator_value_t<It>>, 
+          typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>; 
+ 
+template <typename T, 
+          typename Hash = std::hash<T>, 
+          typename KeyEq = std::equal_to<T>, 
+          typename Alloc = tbb::tbb_allocator<T>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!is_allocator_v<KeyEq>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {}, 
+                          Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) 
+-> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>; 
+ 
+template <typename It, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multiset( It, It, std::size_t, Alloc ) 
+-> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>, 
+                            std::equal_to<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename It, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; 
+ 
+template <typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc ) 
+-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; 
+ 
+template <typename T, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_unordered_multiset( std::initializer_list<T>, Alloc ) 
+-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; 
+ 
+template <typename T, typename Hash, typename Alloc, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>, 
+          typename = std::enable_if_t<!is_allocator_v<Hash>>, 
+          typename = std::enable_if_t<!std::is_integral_v<Hash>>> 
+concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc ) 
+-> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>; 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+template <typename Key, typename Hash, typename KeyEqual, typename Allocator> 
+void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs, 
+           concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) { 
+    lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+ 
+using detail::d1::concurrent_unordered_set; 
+using detail::d1::concurrent_unordered_multiset; 
+using detail::split; 
+ 
+} // inline namespace v1 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_unordered_set_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h
index 94a22b92c6..00295f0d5d 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h
@@ -1,1114 +1,1114 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_concurrent_vector_H
-#define __TBB_concurrent_vector_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_utils.h"
-#include "detail/_assert.h"
-#include "detail/_allocator_traits.h"
-#include "detail/_segment_table.h"
-#include "detail/_containers_helpers.h"
-#include "blocked_range.h"
-#include "cache_aligned_allocator.h"
-
-#include <algorithm>
-#include <utility> // std::move_if_noexcept
-#include <algorithm>
-#if __TBB_CPP20_COMPARISONS_PRESENT
-#include <compare>
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Vector, typename Value>
-class vector_iterator {
-    using vector_type = Vector;
-
-public:
-    using value_type = Value;
-    using size_type = typename vector_type::size_type;
-    using difference_type = typename vector_type::difference_type;
-    using pointer = value_type*;
-    using reference = value_type&;
-    using iterator_category = std::random_access_iterator_tag;
-
-    template <typename Vec, typename Val>
-    friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& );
-
-    template <typename Vec, typename Val1, typename Val2>
-    friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
-
-    template <typename Vec, typename Val1, typename Val2>
-    friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
-
-    template <typename Vec, typename Val1, typename Val2>
-    friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& );
-
-    template <typename Vec, typename Val>
-    friend class vector_iterator;
-
-    template <typename T, typename Allocator>
-    friend class concurrent_vector;
-
-private:
-    vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr )
-        : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item)
-    {}
-
-public:
-    vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr)
-    {}
-
-    vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other )
-        : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item)
-    {}
-
-    vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) {
-        my_vector = other.my_vector;
-        my_index = other.my_index;
-        my_item = other.my_item;
-        return *this;
-    }
-
-    vector_iterator operator+( difference_type offset ) const {
-        return vector_iterator(*my_vector, my_index + offset);
-    }
-
-    vector_iterator& operator+=( difference_type offset ) {
-        my_index += offset;
-        my_item = nullptr;
-        return *this;
-    }
-
-    vector_iterator operator-( difference_type offset ) const {
-        return vector_iterator(*my_vector, my_index - offset);
-    }
-
-    vector_iterator& operator-=( difference_type offset ) {
-        my_index -= offset;
-        my_item = nullptr;
-        return *this;
-    }
-
-    reference operator*() const {
-        value_type *item = my_item;
-        if (item == nullptr) {
-            item = &my_vector->internal_subscript(my_index);
-        } else {
-            __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache");
-        }
-        return *item;
-    }
-
-    pointer operator->() const { return &(operator*()); }
-
-    reference operator[]( difference_type k ) const {
-        return my_vector->internal_subscript(my_index + k);
-    }
-
-    vector_iterator& operator++() {
-        ++my_index;
-        if (my_item != nullptr) {
-            if (vector_type::is_first_element_in_segment(my_index)) {
-                // If the iterator crosses a segment boundary, the pointer become invalid
-                // as possibly next segment is in another memory location
-                my_item = nullptr;
-            } else {
-                ++my_item;
-            }
-        }
-        return *this;
-    }
-
-    vector_iterator operator++(int) {
-        vector_iterator result = *this;
-        ++(*this);
-        return result;
-    }
-
-    vector_iterator& operator--() {
-        __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector");
-        --my_index;
-        if (my_item != nullptr) {
-            if (vector_type::is_first_element_in_segment(my_index)) {
-                // If the iterator crosses a segment boundary, the pointer become invalid
-                // as possibly next segment is in another memory location
-                my_item = nullptr;
-            } else {
-                --my_item;
-            }
-        }
-        return *this;
-    }
-
-    vector_iterator operator--(int) {
-        vector_iterator result = *this;
-        --(*this);
-        return result;
-    }
-
-private:
-    // concurrent_vector over which we are iterating.
-    vector_type* my_vector;
-
-    // Index into the vector
-    size_type my_index;
-
-    // Caches my_vector *it;
-    // If my_item == nullptr cached value is not available use internal_subscript(my_index)
-    mutable value_type* my_item;
-}; // class vector_iterator
-
-template <typename Vector, typename T>
-vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset,
-                                      const vector_iterator<Vector, T>& v )
-{
-    return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset);
-}
-
-template <typename Vector, typename T, typename U>
-typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i,
-                                                                const vector_iterator<Vector, U>& j )
-{
-    using difference_type = typename vector_iterator<Vector, T>::difference_type;
-    return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index);
-}
-
-template <typename Vector, typename T, typename U>
-bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return i.my_vector == j.my_vector && i.my_index == j.my_index;
-}
-
-template <typename Vector, typename T, typename U>
-bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return !(i == j);
-}
-
-template <typename Vector, typename T, typename U>
-bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return i.my_index < j.my_index;
-}
-
-template <typename Vector, typename T, typename U>
-bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return j < i;
-}
-
-template <typename Vector, typename T, typename U>
-bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return !(i < j);
-}
-
-template <typename Vector, typename T, typename U>
-bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) {
-    return !(j < i);
-}
-
-static constexpr std::size_t embedded_table_num_segments = 3;
-
-template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
-class concurrent_vector
-    : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments>
-{
-    using self_type = concurrent_vector<T, Allocator>;
-    using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>;
-
-    friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>;
-
-    template <typename Iterator>
-    class generic_range_type : public tbb::blocked_range<Iterator> {
-        using base_type = tbb::blocked_range<Iterator>;
-    public:
-        using value_type = T;
-        using reference = T&;
-        using const_reference = const T&;
-        using iterator = Iterator;
-        using difference_type = std::ptrdiff_t;
-
-        using base_type::base_type;
-
-        template<typename U>
-        generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {}
-        generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {}
-    }; // class generic_range_type
-
-    static_assert(std::is_same<T, typename Allocator::value_type>::value,
-                  "value_type of the container must be the same as its allocator's");
-    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
-    // Segment table for concurrent_vector can be extended
-    static constexpr bool allow_table_extending = true;
-    static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value ||
-                                                   allocator_traits_type::is_always_equal::value;
-    static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value ||
-                                             allocator_traits_type::is_always_equal::value;
-
-public:
-    using value_type = T;
-    using allocator_type = Allocator;
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using iterator = vector_iterator<concurrent_vector, value_type>;
-    using const_iterator = vector_iterator<concurrent_vector, const value_type>;
-    using reverse_iterator = std::reverse_iterator<iterator>;
-    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
-
-    using range_type = generic_range_type<iterator>;
-    using const_range_type = generic_range_type<const_iterator>;
-
-    concurrent_vector() : concurrent_vector(allocator_type()) {}
-
-    explicit concurrent_vector( const allocator_type& alloc ) noexcept
-        : base_type(alloc)
-    {}
-
-    explicit concurrent_vector( size_type count, const value_type& value,
-                                const allocator_type& alloc = allocator_type() )
-        : concurrent_vector(alloc)
-    {
-        try_call( [&] {
-            grow_by(count, value);
-        } ).on_exception( [&] {
-            base_type::clear();
-        });
-    }
-
-    explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() )
-        : concurrent_vector(alloc)
-    {
-        try_call( [&] {
-            grow_by(count);
-        } ).on_exception( [&] {
-            base_type::clear();
-        });
-    }
-
-    template <typename InputIterator>
-    concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() )
-        : concurrent_vector(alloc)
-    {
-        try_call( [&] {
-            grow_by(first, last);
-        } ).on_exception( [&] {
-            base_type::clear();
-        });
-    }
-
-    concurrent_vector( const concurrent_vector& other )
-        : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator()))
-    {
-        try_call( [&] {
-            grow_by(other.begin(), other.end());
-        } ).on_exception( [&] {
-            base_type::clear();
-        });
-    }
-
-    concurrent_vector( const concurrent_vector& other, const allocator_type& alloc )
-        : base_type(other, alloc) {}
-
-    concurrent_vector(concurrent_vector&& other) noexcept
-        : base_type(std::move(other))
-    {}
-
-    concurrent_vector( concurrent_vector&& other, const allocator_type& alloc )
-        : base_type(std::move(other), alloc)
-    {}
-
-    concurrent_vector( std::initializer_list<value_type> init,
-                       const allocator_type& alloc = allocator_type() )
-        : concurrent_vector(init.begin(), init.end(), alloc)
-    {}
-
-    ~concurrent_vector() {}
-
-    // Assignment
-    concurrent_vector& operator=( const concurrent_vector& other ) {
-        base_type::operator=(other);
-        return *this;
-    }
-
-    concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) {
-        base_type::operator=(std::move(other));
-        return *this;
-    }
-
-    concurrent_vector& operator=( std::initializer_list<value_type> init ) {
-        assign(init);
-        return *this;
-    }
-
-    void assign( size_type count, const value_type& value ) {
-        destroy_elements();
-        grow_by(count, value);
-    }
-
-    template <typename InputIterator>
-    typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type
-    assign( InputIterator first, InputIterator last ) {
-        destroy_elements();
-        grow_by(first, last);
-    }
-
-    void assign( std::initializer_list<value_type> init ) {
-        destroy_elements();
-        assign(init.begin(), init.end());
-    }
-
-    // Concurrent growth
-    iterator grow_by( size_type delta ) {
-        return internal_grow_by_delta(delta);
-    }
-
-    iterator grow_by( size_type delta, const value_type& value ) {
-        return internal_grow_by_delta(delta, value);
-    }
-
-    template <typename ForwardIterator>
-    typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type
-    grow_by( ForwardIterator first, ForwardIterator last ) {
-        auto delta = std::distance(first, last);
-        return internal_grow_by_delta(delta, first, last);
-    }
-
-    iterator grow_by( std::initializer_list<value_type> init ) {
-        return grow_by(init.begin(), init.end());
-    }
-
-    iterator grow_to_at_least( size_type n ) {
-        return internal_grow_to_at_least(n);
-    }
-    iterator grow_to_at_least( size_type n, const value_type& value ) {
-        return internal_grow_to_at_least(n, value);
-    }
-
-    iterator push_back( const value_type& item ) {
-        return internal_emplace_back(item);
-    }
-
-    iterator push_back( value_type&& item ) {
-        return internal_emplace_back(std::move(item));
-    }
-
-    template <typename... Args>
-    iterator emplace_back( Args&&... args ) {
-        return internal_emplace_back(std::forward<Args>(args)...);
-    }
-
-    // Items access
-    reference operator[]( size_type index ) {
-        return internal_subscript(index);
-    }
-    const_reference operator[]( size_type index ) const {
-        return internal_subscript(index);
-    }
-
-    reference at( size_type index ) {
-        return internal_subscript_with_exceptions(index);
-    }
-    const_reference at( size_type index ) const {
-        return internal_subscript_with_exceptions(index);
-    }
-
-    // Get range for iterating with parallel algorithms
-    range_type range( size_t grainsize = 1 ) {
-        return range_type(begin(), end(), grainsize);
-    }
-
-    // Get const range for iterating with parallel algorithms
-    const_range_type range( size_t grainsize = 1 ) const {
-        return const_range_type(begin(), end(), grainsize);
-    }
-
-    reference front() {
-        return internal_subscript(0);
-    }
-
-    const_reference front() const {
-        return internal_subscript(0);
-    }
-
-    reference back() {
-        return internal_subscript(size() - 1);
-    }
-
-    const_reference back() const {
-        return internal_subscript(size() - 1);
-    }
-
-    // Iterators
-    iterator begin() { return iterator(*this, 0); }
-    const_iterator begin() const { return const_iterator(*this, 0); }
-    const_iterator cbegin() const { return const_iterator(*this, 0); }
-
-    iterator end() { return iterator(*this, size()); }
-    const_iterator end() const { return const_iterator(*this, size()); }
-    const_iterator cend() const { return const_iterator(*this, size()); }
-
-    reverse_iterator rbegin() { return reverse_iterator(end()); }
-    const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
-    const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
-
-    reverse_iterator rend() { return reverse_iterator(begin()); }
-    const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
-    const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
-
-    allocator_type get_allocator() const {
-        return base_type::get_allocator();
-    }
-
-    // Storage
-    bool empty() const noexcept {
-        return 0 == size();
-    }
-
-    size_type size() const noexcept {
-        return std::min(this->my_size.load(std::memory_order_acquire), capacity());
-    }
-
-    size_type max_size() const noexcept {
-        return allocator_traits_type::max_size(base_type::get_allocator());
-    }
-
-    size_type capacity() const noexcept {
-        return base_type::capacity();
-    }
-
-    void reserve( size_type n ) {
-        if (n == 0) return;
-
-        if (n > max_size()) {
-            tbb::detail::throw_exception(exception_id::reservation_length_error);
-        }
-
-        this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1);
-        base_type::reserve(n);
-    }
-
-    void resize( size_type n ) {
-        internal_resize(n);
-    }
-
-    void resize( size_type n, const value_type& val ) {
-        internal_resize(n, val);
-    }
-
-    void shrink_to_fit() {
-        internal_compact();
-    }
-
-    void swap(concurrent_vector& other) noexcept(is_noexcept_swap) {
-        base_type::swap(other);
-    }
-
-    void clear() {
-        destroy_elements();
-    }
-
-private:
-    using segment_type = typename base_type::segment_type;
-    using segment_table_type = typename base_type::segment_table_type;
-    using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits;
-    using segment_index_type = typename base_type::segment_index_type;
-
-    using segment_element_type = typename base_type::value_type;
-    using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>;
-    using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>;
-
-    segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) {
-        __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table");
-
-        // If other threads are trying to set pointers in the short segment, wait for them to finish their
-        // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it
-        for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) {
-            spin_wait_while_eq(embedded_table[i], segment_type(nullptr));
-        }
-
-        // It is possible that the table was extend by a thread allocating first_block, need to check this.
-        if (this->get_table() != embedded_table) {
-            return nullptr;
-        }
-
-        // Allocate long segment table and fill with null pointers
-        segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table);
-        // Copy segment pointers from the embedded table
-        for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) {
-            segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index],
-                embedded_table[segment_index].load(std::memory_order_relaxed));
-        }
-        for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) {
-            segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr);
-        }
-
-        return new_segment_table;
-    }
-
-    // create_segment function is required by the segment_table base class
-    segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) {
-        size_type first_block = this->my_first_block.load(std::memory_order_relaxed);
-        // First block allocation
-        if (seg_index < first_block) {
-            // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested
-            if (table[0].load(std::memory_order_acquire) != nullptr) {
-                spin_wait_while_eq(table[seg_index], segment_type(nullptr));
-                return nullptr;
-            }
-
-            segment_element_allocator_type segment_allocator(base_type::get_allocator());
-            segment_type new_segment = nullptr;
-            size_type first_block_size = this->segment_size(first_block);
-            try_call( [&] {
-                new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size);
-            } ).on_exception( [&] {
-                segment_type disabled_segment = nullptr;
-                if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) {
-                    size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block;
-                    for (size_type i = 1; i < end_segment; ++i) {
-                        table[i].store(this->segment_allocation_failure_tag, std::memory_order_release);
-                    }
-                }
-            });
-
-            segment_type disabled_segment = nullptr;
-            if (table[0].compare_exchange_strong(disabled_segment, new_segment)) {
-                this->extend_table_if_necessary(table, 0, first_block_size);
-                for (size_type i = 1; i < first_block; ++i) {
-                    table[i].store(new_segment, std::memory_order_release);
-                }
-
-                // Other threads can wait on a snapshot of an embedded table, need to fill it.
-                for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) {
-                    this->my_embedded_table[i].store(new_segment, std::memory_order_release);
-                }
-            } else if (new_segment != this->segment_allocation_failure_tag) {
-                // Deallocate the memory
-                segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size);
-                // 0 segment is already allocated, then it remains to wait until the segments are filled to requested
-                spin_wait_while_eq(table[seg_index], segment_type(nullptr));
-            }
-        } else {
-            size_type offset = this->segment_base(seg_index);
-            if (index == offset) {
-                __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment");
-                segment_element_allocator_type segment_allocator(base_type::get_allocator());
-                segment_type new_segment = this->segment_allocation_failure_tag;
-                try_call( [&] {
-                    new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index));
-                    // Shift base address to simplify access by index
-                    new_segment -= this->segment_base(seg_index);
-                } ).on_completion( [&] {
-                    table[seg_index].store(new_segment, std::memory_order_release);
-                });
-            } else {
-                spin_wait_while_eq(table[seg_index], segment_type(nullptr));
-            }
-        }
-        return nullptr;
-    }
-
-    // Returns the number of elements in the segment to be destroy
-    size_type number_of_elements_in_segment( segment_index_type seg_index ) {
-        size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed);
-        size_type curr_segment_base = this->segment_base(seg_index);
-
-        if (seg_index == 0) {
-            return std::min(curr_vector_size, this->segment_size(seg_index));
-        } else {
-            // Perhaps the segment is allocated, but there are no elements in it.
-            if (curr_vector_size < curr_segment_base) {
-                return 0;
-            }
-            return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base;
-        }
-    }
-
-    void deallocate_segment( segment_type address, segment_index_type seg_index ) {
-        segment_element_allocator_type segment_allocator(base_type::get_allocator());
-        size_type first_block = this->my_first_block.load(std::memory_order_relaxed);
-        if (seg_index >= first_block) {
-            segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index));
-        }
-        else if (seg_index == 0) {
-            size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0);
-            segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate);
-        }
-    }
-
-    // destroy_segment function is required by the segment_table base class
-    void destroy_segment( segment_type address, segment_index_type seg_index ) {
-        size_type elements_to_destroy = number_of_elements_in_segment(seg_index);
-        segment_element_allocator_type segment_allocator(base_type::get_allocator());
-
-        for (size_type i = 0; i < elements_to_destroy; ++i) {
-            segment_element_allocator_traits::destroy(segment_allocator, address + i);
-        }
-
-        deallocate_segment(address, seg_index);
-    }
-
-    // copy_segment function is required by the segment_table base class
-    void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) {
-        size_type i = 0;
-        try_call( [&] {
-            for (; i != number_of_elements_in_segment(seg_index); ++i) {
-                segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]);
-            }
-        } ).on_exception( [&] {
-            // Zero-initialize items left not constructed after the exception
-            zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i);
-
-            segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed));
-            auto table = this->get_table();
-            for (segment_index_type j = seg_index + 1; j != last_segment; ++j) {
-                auto curr_segment = table[j].load(std::memory_order_relaxed);
-                if (curr_segment) {
-                    zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j));
-                }
-            }
-            this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed);
-        });
-    }
-
-    // move_segment function is required by the segment_table base class
-    void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) {
-        size_type i = 0;
-        try_call( [&] {
-            for (; i != number_of_elements_in_segment(seg_index); ++i) {
-                segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i]));
-            }
-        } ).on_exception( [&] {
-            // Zero-initialize items left not constructed after the exception
-            zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i);
-
-            segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed));
-            auto table = this->get_table();
-            for (segment_index_type j = seg_index + 1; j != last_segment; ++j) {
-                auto curr_segment = table[j].load(std::memory_order_relaxed);
-                if (curr_segment) {
-                    zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j));
-                }
-            }
-            this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed);
-        });
-    }
-
-    static constexpr bool is_first_element_in_segment( size_type index ) {
-        // An element is the first in a segment if its index is equal to a power of two
-        return is_power_of_two_at_least(index, 2);
-    }
-
-    const_reference internal_subscript( size_type index ) const {
-        return const_cast<self_type*>(this)->internal_subscript(index);
-    }
-
-    reference internal_subscript( size_type index ) {
-        __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index");
-        return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index);
-    }
-
-    const_reference internal_subscript_with_exceptions( size_type index ) const {
-        return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index);
-    }
-
-    reference internal_subscript_with_exceptions( size_type index ) {
-        if (index >= this->my_size.load(std::memory_order_acquire)) {
-            tbb::detail::throw_exception(exception_id::out_of_range);
-        }
-
-        segment_table_type table = this->my_segment_table.load(std::memory_order_acquire);
-
-        size_type seg_index = this->segment_index_of(index);
-        if (base_type::number_of_segments(table) < seg_index) {
-            tbb::detail::throw_exception(exception_id::out_of_range);
-        }
-
-        if (table[seg_index] <= this->segment_allocation_failure_tag) {
-            tbb::detail::throw_exception(exception_id::out_of_range);
-        }
-
-        return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index);
-    }
-
-    static void zero_unconstructed_elements( pointer start, size_type count ) {
-        std::memset(static_cast<void *>(start), 0, count * sizeof(value_type));
-    }
-
-    template <typename... Args>
-    iterator internal_emplace_back( Args&&... args ) {
-        size_type old_size = this->my_size++;
-        this->assign_first_block_if_necessary(default_first_block_size);
-        auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size);
-
-        // try_call API is not convenient here due to broken
-        // variadic capture on GCC 4.8.5
-        auto value_guard = make_raii_guard([&] {
-            zero_unconstructed_elements(element_address, /*count =*/1);
-        });
-
-        segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...);
-        value_guard.dismiss();
-        return iterator(*this, old_size, element_address);
-    }
-
-    template <typename... Args>
-    void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) {
-        static_assert(sizeof...(Args) < 2, "Too many parameters");
-        for (size_type idx = start_idx; idx < end_idx; ++idx) {
-            auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx);
-            // try_call API is not convenient here due to broken
-            // variadic capture on GCC 4.8.5
-            auto value_guard = make_raii_guard( [&] {
-                segment_index_type last_allocated_segment = this->find_last_allocated_segment(table);
-                size_type segment_size = this->segment_size(last_allocated_segment);
-                end_idx = end_idx < segment_size ? end_idx : segment_size;
-                for (size_type i = idx; i < end_idx; ++i) {
-                    zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1);
-                }
-            });
-            segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...);
-            value_guard.dismiss();
-        }
-    }
-
-    template <typename ForwardIterator>
-    void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) {
-        for (size_type idx = start_idx; idx < end_idx; ++idx) {
-            auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx);
-            try_call( [&] {
-                segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++);
-            } ).on_exception( [&] {
-                segment_index_type last_allocated_segment = this->find_last_allocated_segment(table);
-                size_type segment_size = this->segment_size(last_allocated_segment);
-                end_idx = end_idx < segment_size ? end_idx : segment_size;
-                for (size_type i = idx; i < end_idx; ++i) {
-                    zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1);
-                }
-            });
-        }
-    }
-
-    template <typename... Args>
-    iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) {
-        this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1);
-        size_type seg_index = this->segment_index_of(end_idx - 1);
-        segment_table_type table = this->get_table();
-        this->extend_table_if_necessary(table, start_idx, end_idx);
-
-        if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) {
-            // So that other threads be able to work with the last segment of grow_by, allocate it immediately.
-            // If the last segment is not less than the first block
-            if (table[seg_index].load(std::memory_order_relaxed) == nullptr) {
-                size_type first_element = this->segment_base(seg_index);
-                if (first_element >= start_idx && first_element < end_idx) {
-                    segment_type segment = table[seg_index].load(std::memory_order_relaxed);
-                    base_type::enable_segment(segment, table, seg_index, first_element);
-                }
-            }
-        }
-
-        internal_loop_construct(table, start_idx, end_idx, args...);
-
-        return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx));
-    }
-
-
-    template <typename... Args>
-    iterator internal_grow_by_delta( size_type delta, const Args&... args ) {
-        if (delta == size_type(0)) {
-            return end();
-        }
-        size_type start_idx = this->my_size.fetch_add(delta);
-        size_type end_idx = start_idx + delta;
-
-        return internal_grow(start_idx, end_idx, args...);
-    }
-
-    template <typename... Args>
-    iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) {
-        size_type old_size = this->my_size.load(std::memory_order_relaxed);
-        if (new_size == size_type(0)) return iterator(*this, 0);
-        while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size))
-        {}
-
-        int delta = static_cast<int>(new_size) - static_cast<int>(old_size);
-        if (delta > 0) {
-            return internal_grow(old_size, new_size, args...);
-        }
-
-        size_type end_segment = this->segment_index_of(new_size - 1);
-
-        // Check/wait for segments allocation completes
-        if (end_segment >= this->pointers_per_embedded_table &&
-            this->get_table() == this->my_embedded_table)
-        {
-            spin_wait_while_eq(this->my_segment_table, this->my_embedded_table);
-        }
-
-        for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) {
-            if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) {
-                atomic_backoff backoff(true);
-                while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) {
-                    backoff.pause();
-                }
-            }
-        }
-
-    #if TBB_USE_DEBUG
-        size_type cap = capacity();
-        __TBB_ASSERT( cap >= new_size, NULL);
-    #endif
-        return iterator(*this, size());
-    }
-
-    template <typename... Args>
-    void internal_resize( size_type n, const Args&... args ) {
-        if (n == 0) {
-            clear();
-            return;
-        }
-
-        size_type old_size = this->my_size.load(std::memory_order_acquire);
-        if (n > old_size) {
-            reserve(n);
-            grow_to_at_least(n, args...);
-        } else {
-            if (old_size == n) {
-                return;
-            }
-            size_type last_segment = this->segment_index_of(old_size - 1);
-            // Delete segments
-            for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) {
-                this->delete_segment(seg_idx);
-            }
-
-            // If n > segment_size(n) => we need to destroy all of the items in the first segment
-            // Otherwise, we need to destroy only items with the index < n
-            size_type n_segment = this->segment_index_of(n - 1);
-            size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size);
-            // Destroy elements in curr segment
-            for (size_type idx = n; idx < last_index_to_destroy; ++idx) {
-                segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx));
-            }
-            this->my_size.store(n, std::memory_order_release);
-        }
-    }
-
-    void destroy_elements() {
-        allocator_type alloc(base_type::get_allocator());
-        for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) {
-            allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i));
-        }
-        this->my_size.store(0, std::memory_order_relaxed);
-    }
-
-    static bool incompact_predicate( size_type size ) {
-        // memory page size
-        const size_type page_size = 4096;
-        return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128);
-    }
-
-    void internal_compact() {
-        const size_type curr_size = this->my_size.load(std::memory_order_relaxed);
-        segment_table_type table = this->get_table();
-        const segment_index_type k_end = this->find_last_allocated_segment(table);                   // allocated segments
-        const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;..
-        const segment_index_type first_block = this->my_first_block;                                 // number of merged segments, getting values from atomics
-
-        segment_index_type k = first_block;
-        if (k_stop < first_block) {
-            k = k_stop;
-        }
-        else {
-            while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++;
-        }
-
-        if (k_stop == k_end && k == first_block) {
-            return;
-        }
-
-        // First segment optimization
-        if (k != first_block && k) {
-            size_type max_block = std::max(first_block, k);
-
-            auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block);
-
-            for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
-                segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx],
-                    table[seg_idx].load(std::memory_order_relaxed));
-                table[seg_idx].store(nullptr, std::memory_order_relaxed);
-            }
-
-            this->my_first_block.store(k, std::memory_order_relaxed);
-            size_type index = 0;
-            try_call( [&] {
-                for (; index < std::min(this->segment_size(max_block), curr_size); ++index) {
-                    auto element_address = &static_cast<base_type*>(this)->operator[](index);
-                    segment_index_type seg_idx = this->segment_index_of(index);
-                    segment_table_allocator_traits::construct(base_type::get_allocator(), element_address,
-                    std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index]));
-                }
-            } ).on_exception( [&] {
-                segment_element_allocator_type allocator(base_type::get_allocator());
-                for (size_type i = 0; i < index; ++i) {
-                    auto element_adress = &this->operator[](i);
-                    segment_element_allocator_traits::destroy(allocator, element_adress);
-                }
-                segment_element_allocator_traits::deallocate(allocator,
-                    table[0].load(std::memory_order_relaxed), this->segment_size(max_block));
-
-                for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
-                    table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed),
-                        std::memory_order_relaxed);
-                    buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed);
-                }
-                segment_table_allocator_traits::deallocate(base_type::get_allocator(),
-                    buffer_table, max_block);
-                this->my_first_block.store(first_block, std::memory_order_relaxed);
-            });
-
-            // Need to correct deallocate old segments
-            // Method destroy_segment respect active first_block, therefore,
-            // in order for the segment deletion to work correctly, set the first_block size that was earlier,
-            // destroy the unnecessary segments.
-            this->my_first_block.store(first_block, std::memory_order_relaxed);
-            for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) {
-                auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed);
-                if (curr_segment != nullptr) {
-                    destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1),
-                        seg_idx - 1);
-                }
-            }
-
-            this->my_first_block.store(k, std::memory_order_relaxed);
-
-            for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) {
-                segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]);
-            }
-
-            segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block);
-        }
-        // free unnecessary segments allocated by reserve() call
-        if (k_stop < k_end) {
-            for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) {
-                if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) {
-                    this->delete_segment(seg_idx - 1);
-                }
-            }
-            if (!k) this->my_first_block.store(0, std::memory_order_relaxed);;
-        }
-    }
-
-    // Lever for adjusting the size of first_block at the very first insertion.
-    // TODO: consider >1 value, check performance
-    static constexpr size_type default_first_block_size = 1;
-
-    template <typename Vector, typename Value>
-    friend class vector_iterator;
-}; // class concurrent_vector
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-// Deduction guide for the constructor from two iterators
-template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
-          typename = std::enable_if_t<is_input_iterator_v<It>>,
-          typename = std::enable_if_t<is_allocator_v<Alloc>>>
-concurrent_vector( It, It, Alloc = Alloc() )
--> concurrent_vector<iterator_value_t<It>, Alloc>;
-#endif
-
-template <typename T, typename Allocator>
-void swap(concurrent_vector<T, Allocator> &lhs,
-          concurrent_vector<T, Allocator> &rhs)
-{
-    lhs.swap(rhs);
-}
-
-template <typename T, typename Allocator>
-bool operator==(const concurrent_vector<T, Allocator> &lhs,
-                const concurrent_vector<T, Allocator> &rhs)
-{
-    return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
-}
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template <typename T, typename Allocator>
-bool operator!=(const concurrent_vector<T, Allocator> &lhs,
-                const concurrent_vector<T, Allocator> &rhs)
-{
-    return !(lhs == rhs);
-}
-#endif // !__TBB_CPP20_COMPARISONS_PRESENT
-
-#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
-template <typename T, typename Allocator>
-tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type>
-operator<=>(const concurrent_vector<T, Allocator> &lhs,
-            const concurrent_vector<T, Allocator> &rhs)
-{
-    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(),
-                                                  rhs.begin(), rhs.end(),
-                                                  tbb::detail::synthesized_three_way_comparator{});
-}
-
-#else
-
-template <typename T, typename Allocator>
-bool operator<(const concurrent_vector<T, Allocator> &lhs,
-               const concurrent_vector<T, Allocator> &rhs)
-{
-    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-template <typename T, typename Allocator>
-bool operator<=(const concurrent_vector<T, Allocator> &lhs,
-                const concurrent_vector<T, Allocator> &rhs)
-{
-    return !(rhs < lhs);
-}
-
-template <typename T, typename Allocator>
-bool operator>(const concurrent_vector<T, Allocator> &lhs,
-               const concurrent_vector<T, Allocator> &rhs)
-{
-    return rhs < lhs;
-}
-
-template <typename T, typename Allocator>
-bool operator>=(const concurrent_vector<T, Allocator> &lhs,
-                const concurrent_vector<T, Allocator> &rhs)
-{
-    return !(lhs < rhs);
-}
-#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::d1::concurrent_vector;
-} // namespace v1
-
-} // namespace tbb
-
-#endif // __TBB_concurrent_vector_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_concurrent_vector_H 
+#define __TBB_concurrent_vector_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_utils.h" 
+#include "detail/_assert.h" 
+#include "detail/_allocator_traits.h" 
+#include "detail/_segment_table.h" 
+#include "detail/_containers_helpers.h" 
+#include "blocked_range.h" 
+#include "cache_aligned_allocator.h" 
+ 
+#include <algorithm> 
+#include <utility> // std::move_if_noexcept 
+#include <algorithm> 
+#if __TBB_CPP20_COMPARISONS_PRESENT 
+#include <compare> 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Vector, typename Value> 
+class vector_iterator { 
+    using vector_type = Vector; 
+ 
+public: 
+    using value_type = Value; 
+    using size_type = typename vector_type::size_type; 
+    using difference_type = typename vector_type::difference_type; 
+    using pointer = value_type*; 
+    using reference = value_type&; 
+    using iterator_category = std::random_access_iterator_tag; 
+ 
+    template <typename Vec, typename Val> 
+    friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& ); 
+ 
+    template <typename Vec, typename Val1, typename Val2> 
+    friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); 
+ 
+    template <typename Vec, typename Val1, typename Val2> 
+    friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); 
+ 
+    template <typename Vec, typename Val1, typename Val2> 
+    friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); 
+ 
+    template <typename Vec, typename Val> 
+    friend class vector_iterator; 
+ 
+    template <typename T, typename Allocator> 
+    friend class concurrent_vector; 
+ 
+private: 
+    vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr ) 
+        : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item) 
+    {} 
+ 
+public: 
+    vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr) 
+    {} 
+ 
+    vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other ) 
+        : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item) 
+    {} 
+ 
+    vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) { 
+        my_vector = other.my_vector; 
+        my_index = other.my_index; 
+        my_item = other.my_item; 
+        return *this; 
+    } 
+ 
+    vector_iterator operator+( difference_type offset ) const { 
+        return vector_iterator(*my_vector, my_index + offset); 
+    } 
+ 
+    vector_iterator& operator+=( difference_type offset ) { 
+        my_index += offset; 
+        my_item = nullptr; 
+        return *this; 
+    } 
+ 
+    vector_iterator operator-( difference_type offset ) const { 
+        return vector_iterator(*my_vector, my_index - offset); 
+    } 
+ 
+    vector_iterator& operator-=( difference_type offset ) { 
+        my_index -= offset; 
+        my_item = nullptr; 
+        return *this; 
+    } 
+ 
+    reference operator*() const { 
+        value_type *item = my_item; 
+        if (item == nullptr) { 
+            item = &my_vector->internal_subscript(my_index); 
+        } else { 
+            __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache"); 
+        } 
+        return *item; 
+    } 
+ 
+    pointer operator->() const { return &(operator*()); } 
+ 
+    reference operator[]( difference_type k ) const { 
+        return my_vector->internal_subscript(my_index + k); 
+    } 
+ 
+    vector_iterator& operator++() { 
+        ++my_index; 
+        if (my_item != nullptr) { 
+            if (vector_type::is_first_element_in_segment(my_index)) { 
+                // If the iterator crosses a segment boundary, the pointer become invalid 
+                // as possibly next segment is in another memory location 
+                my_item = nullptr; 
+            } else { 
+                ++my_item; 
+            } 
+        } 
+        return *this; 
+    } 
+ 
+    vector_iterator operator++(int) { 
+        vector_iterator result = *this; 
+        ++(*this); 
+        return result; 
+    } 
+ 
+    vector_iterator& operator--() { 
+        __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector"); 
+        --my_index; 
+        if (my_item != nullptr) { 
+            if (vector_type::is_first_element_in_segment(my_index)) { 
+                // If the iterator crosses a segment boundary, the pointer become invalid 
+                // as possibly next segment is in another memory location 
+                my_item = nullptr; 
+            } else { 
+                --my_item; 
+            } 
+        } 
+        return *this; 
+    } 
+ 
+    vector_iterator operator--(int) { 
+        vector_iterator result = *this; 
+        --(*this); 
+        return result; 
+    } 
+ 
+private: 
+    // concurrent_vector over which we are iterating. 
+    vector_type* my_vector; 
+ 
+    // Index into the vector 
+    size_type my_index; 
+ 
+    // Caches my_vector *it; 
+    // If my_item == nullptr cached value is not available use internal_subscript(my_index) 
+    mutable value_type* my_item; 
+}; // class vector_iterator 
+ 
+template <typename Vector, typename T> 
+vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset, 
+                                      const vector_iterator<Vector, T>& v ) 
+{ 
+    return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset); 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i, 
+                                                                const vector_iterator<Vector, U>& j ) 
+{ 
+    using difference_type = typename vector_iterator<Vector, T>::difference_type; 
+    return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index); 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return i.my_vector == j.my_vector && i.my_index == j.my_index; 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return !(i == j); 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return i.my_index < j.my_index; 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return j < i; 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return !(i < j); 
+} 
+ 
+template <typename Vector, typename T, typename U> 
+bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { 
+    return !(j < i); 
+} 
+ 
+static constexpr std::size_t embedded_table_num_segments = 3; 
+ 
+template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> 
+class concurrent_vector 
+    : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments> 
+{ 
+    using self_type = concurrent_vector<T, Allocator>; 
+    using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>; 
+ 
+    friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>; 
+ 
+    template <typename Iterator> 
+    class generic_range_type : public tbb::blocked_range<Iterator> { 
+        using base_type = tbb::blocked_range<Iterator>; 
+    public: 
+        using value_type = T; 
+        using reference = T&; 
+        using const_reference = const T&; 
+        using iterator = Iterator; 
+        using difference_type = std::ptrdiff_t; 
+ 
+        using base_type::base_type; 
+ 
+        template<typename U> 
+        generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {} 
+        generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {} 
+    }; // class generic_range_type 
+ 
+    static_assert(std::is_same<T, typename Allocator::value_type>::value, 
+                  "value_type of the container must be the same as its allocator's"); 
+    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 
+    // Segment table for concurrent_vector can be extended 
+    static constexpr bool allow_table_extending = true; 
+    static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value || 
+                                                   allocator_traits_type::is_always_equal::value; 
+    static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value || 
+                                             allocator_traits_type::is_always_equal::value; 
+ 
+public: 
+    using value_type = T; 
+    using allocator_type = Allocator; 
+    using size_type = std::size_t; 
+    using difference_type = std::ptrdiff_t; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+ 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using iterator = vector_iterator<concurrent_vector, value_type>; 
+    using const_iterator = vector_iterator<concurrent_vector, const value_type>; 
+    using reverse_iterator = std::reverse_iterator<iterator>; 
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>; 
+ 
+    using range_type = generic_range_type<iterator>; 
+    using const_range_type = generic_range_type<const_iterator>; 
+ 
+    concurrent_vector() : concurrent_vector(allocator_type()) {} 
+ 
+    explicit concurrent_vector( const allocator_type& alloc ) noexcept 
+        : base_type(alloc) 
+    {} 
+ 
+    explicit concurrent_vector( size_type count, const value_type& value, 
+                                const allocator_type& alloc = allocator_type() ) 
+        : concurrent_vector(alloc) 
+    { 
+        try_call( [&] { 
+            grow_by(count, value); 
+        } ).on_exception( [&] { 
+            base_type::clear(); 
+        }); 
+    } 
+ 
+    explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() ) 
+        : concurrent_vector(alloc) 
+    { 
+        try_call( [&] { 
+            grow_by(count); 
+        } ).on_exception( [&] { 
+            base_type::clear(); 
+        }); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() ) 
+        : concurrent_vector(alloc) 
+    { 
+        try_call( [&] { 
+            grow_by(first, last); 
+        } ).on_exception( [&] { 
+            base_type::clear(); 
+        }); 
+    } 
+ 
+    concurrent_vector( const concurrent_vector& other ) 
+        : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator())) 
+    { 
+        try_call( [&] { 
+            grow_by(other.begin(), other.end()); 
+        } ).on_exception( [&] { 
+            base_type::clear(); 
+        }); 
+    } 
+ 
+    concurrent_vector( const concurrent_vector& other, const allocator_type& alloc ) 
+        : base_type(other, alloc) {} 
+ 
+    concurrent_vector(concurrent_vector&& other) noexcept 
+        : base_type(std::move(other)) 
+    {} 
+ 
+    concurrent_vector( concurrent_vector&& other, const allocator_type& alloc ) 
+        : base_type(std::move(other), alloc) 
+    {} 
+ 
+    concurrent_vector( std::initializer_list<value_type> init, 
+                       const allocator_type& alloc = allocator_type() ) 
+        : concurrent_vector(init.begin(), init.end(), alloc) 
+    {} 
+ 
+    ~concurrent_vector() {} 
+ 
+    // Assignment 
+    concurrent_vector& operator=( const concurrent_vector& other ) { 
+        base_type::operator=(other); 
+        return *this; 
+    } 
+ 
+    concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) { 
+        base_type::operator=(std::move(other)); 
+        return *this; 
+    } 
+ 
+    concurrent_vector& operator=( std::initializer_list<value_type> init ) { 
+        assign(init); 
+        return *this; 
+    } 
+ 
+    void assign( size_type count, const value_type& value ) { 
+        destroy_elements(); 
+        grow_by(count, value); 
+    } 
+ 
+    template <typename InputIterator> 
+    typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type 
+    assign( InputIterator first, InputIterator last ) { 
+        destroy_elements(); 
+        grow_by(first, last); 
+    } 
+ 
+    void assign( std::initializer_list<value_type> init ) { 
+        destroy_elements(); 
+        assign(init.begin(), init.end()); 
+    } 
+ 
+    // Concurrent growth 
+    iterator grow_by( size_type delta ) { 
+        return internal_grow_by_delta(delta); 
+    } 
+ 
+    iterator grow_by( size_type delta, const value_type& value ) { 
+        return internal_grow_by_delta(delta, value); 
+    } 
+ 
+    template <typename ForwardIterator> 
+    typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type 
+    grow_by( ForwardIterator first, ForwardIterator last ) { 
+        auto delta = std::distance(first, last); 
+        return internal_grow_by_delta(delta, first, last); 
+    } 
+ 
+    iterator grow_by( std::initializer_list<value_type> init ) { 
+        return grow_by(init.begin(), init.end()); 
+    } 
+ 
+    iterator grow_to_at_least( size_type n ) { 
+        return internal_grow_to_at_least(n); 
+    } 
+    iterator grow_to_at_least( size_type n, const value_type& value ) { 
+        return internal_grow_to_at_least(n, value); 
+    } 
+ 
+    iterator push_back( const value_type& item ) { 
+        return internal_emplace_back(item); 
+    } 
+ 
+    iterator push_back( value_type&& item ) { 
+        return internal_emplace_back(std::move(item)); 
+    } 
+ 
+    template <typename... Args> 
+    iterator emplace_back( Args&&... args ) { 
+        return internal_emplace_back(std::forward<Args>(args)...); 
+    } 
+ 
+    // Items access 
+    reference operator[]( size_type index ) { 
+        return internal_subscript(index); 
+    } 
+    const_reference operator[]( size_type index ) const { 
+        return internal_subscript(index); 
+    } 
+ 
+    reference at( size_type index ) { 
+        return internal_subscript_with_exceptions(index); 
+    } 
+    const_reference at( size_type index ) const { 
+        return internal_subscript_with_exceptions(index); 
+    } 
+ 
+    // Get range for iterating with parallel algorithms 
+    range_type range( size_t grainsize = 1 ) { 
+        return range_type(begin(), end(), grainsize); 
+    } 
+ 
+    // Get const range for iterating with parallel algorithms 
+    const_range_type range( size_t grainsize = 1 ) const { 
+        return const_range_type(begin(), end(), grainsize); 
+    } 
+ 
+    reference front() { 
+        return internal_subscript(0); 
+    } 
+ 
+    const_reference front() const { 
+        return internal_subscript(0); 
+    } 
+ 
+    reference back() { 
+        return internal_subscript(size() - 1); 
+    } 
+ 
+    const_reference back() const { 
+        return internal_subscript(size() - 1); 
+    } 
+ 
+    // Iterators 
+    iterator begin() { return iterator(*this, 0); } 
+    const_iterator begin() const { return const_iterator(*this, 0); } 
+    const_iterator cbegin() const { return const_iterator(*this, 0); } 
+ 
+    iterator end() { return iterator(*this, size()); } 
+    const_iterator end() const { return const_iterator(*this, size()); } 
+    const_iterator cend() const { return const_iterator(*this, size()); } 
+ 
+    reverse_iterator rbegin() { return reverse_iterator(end()); } 
+    const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } 
+    const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } 
+ 
+    reverse_iterator rend() { return reverse_iterator(begin()); } 
+    const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } 
+    const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } 
+ 
+    allocator_type get_allocator() const { 
+        return base_type::get_allocator(); 
+    } 
+ 
+    // Storage 
+    bool empty() const noexcept { 
+        return 0 == size(); 
+    } 
+ 
+    size_type size() const noexcept { 
+        return std::min(this->my_size.load(std::memory_order_acquire), capacity()); 
+    } 
+ 
+    size_type max_size() const noexcept { 
+        return allocator_traits_type::max_size(base_type::get_allocator()); 
+    } 
+ 
+    size_type capacity() const noexcept { 
+        return base_type::capacity(); 
+    } 
+ 
+    void reserve( size_type n ) { 
+        if (n == 0) return; 
+ 
+        if (n > max_size()) { 
+            tbb::detail::throw_exception(exception_id::reservation_length_error); 
+        } 
+ 
+        this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1); 
+        base_type::reserve(n); 
+    } 
+ 
+    void resize( size_type n ) { 
+        internal_resize(n); 
+    } 
+ 
+    void resize( size_type n, const value_type& val ) { 
+        internal_resize(n, val); 
+    } 
+ 
+    void shrink_to_fit() { 
+        internal_compact(); 
+    } 
+ 
+    void swap(concurrent_vector& other) noexcept(is_noexcept_swap) { 
+        base_type::swap(other); 
+    } 
+ 
+    void clear() { 
+        destroy_elements(); 
+    } 
+ 
+private: 
+    using segment_type = typename base_type::segment_type; 
+    using segment_table_type = typename base_type::segment_table_type; 
+    using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits; 
+    using segment_index_type = typename base_type::segment_index_type; 
+ 
+    using segment_element_type = typename base_type::value_type; 
+    using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>; 
+    using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>; 
+ 
+    segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) { 
+        __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table"); 
+ 
+        // If other threads are trying to set pointers in the short segment, wait for them to finish their 
+        // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it 
+        for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) { 
+            spin_wait_while_eq(embedded_table[i], segment_type(nullptr)); 
+        } 
+ 
+        // It is possible that the table was extend by a thread allocating first_block, need to check this. 
+        if (this->get_table() != embedded_table) { 
+            return nullptr; 
+        } 
+ 
+        // Allocate long segment table and fill with null pointers 
+        segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table); 
+        // Copy segment pointers from the embedded table 
+        for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) { 
+            segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], 
+                embedded_table[segment_index].load(std::memory_order_relaxed)); 
+        } 
+        for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) { 
+            segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr); 
+        } 
+ 
+        return new_segment_table; 
+    } 
+ 
+    // create_segment function is required by the segment_table base class 
+    segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) { 
+        size_type first_block = this->my_first_block.load(std::memory_order_relaxed); 
+        // First block allocation 
+        if (seg_index < first_block) { 
+            // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested 
+            if (table[0].load(std::memory_order_acquire) != nullptr) { 
+                spin_wait_while_eq(table[seg_index], segment_type(nullptr)); 
+                return nullptr; 
+            } 
+ 
+            segment_element_allocator_type segment_allocator(base_type::get_allocator()); 
+            segment_type new_segment = nullptr; 
+            size_type first_block_size = this->segment_size(first_block); 
+            try_call( [&] { 
+                new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size); 
+            } ).on_exception( [&] { 
+                segment_type disabled_segment = nullptr; 
+                if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) { 
+                    size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block; 
+                    for (size_type i = 1; i < end_segment; ++i) { 
+                        table[i].store(this->segment_allocation_failure_tag, std::memory_order_release); 
+                    } 
+                } 
+            }); 
+ 
+            segment_type disabled_segment = nullptr; 
+            if (table[0].compare_exchange_strong(disabled_segment, new_segment)) { 
+                this->extend_table_if_necessary(table, 0, first_block_size); 
+                for (size_type i = 1; i < first_block; ++i) { 
+                    table[i].store(new_segment, std::memory_order_release); 
+                } 
+ 
+                // Other threads can wait on a snapshot of an embedded table, need to fill it. 
+                for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) { 
+                    this->my_embedded_table[i].store(new_segment, std::memory_order_release); 
+                } 
+            } else if (new_segment != this->segment_allocation_failure_tag) { 
+                // Deallocate the memory 
+                segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size); 
+                // 0 segment is already allocated, then it remains to wait until the segments are filled to requested 
+                spin_wait_while_eq(table[seg_index], segment_type(nullptr)); 
+            } 
+        } else { 
+            size_type offset = this->segment_base(seg_index); 
+            if (index == offset) { 
+                __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment"); 
+                segment_element_allocator_type segment_allocator(base_type::get_allocator()); 
+                segment_type new_segment = this->segment_allocation_failure_tag; 
+                try_call( [&] { 
+                    new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index)); 
+                    // Shift base address to simplify access by index 
+                    new_segment -= this->segment_base(seg_index); 
+                } ).on_completion( [&] { 
+                    table[seg_index].store(new_segment, std::memory_order_release); 
+                }); 
+            } else { 
+                spin_wait_while_eq(table[seg_index], segment_type(nullptr)); 
+            } 
+        } 
+        return nullptr; 
+    } 
+ 
+    // Returns the number of elements in the segment to be destroy 
+    size_type number_of_elements_in_segment( segment_index_type seg_index ) { 
+        size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed); 
+        size_type curr_segment_base = this->segment_base(seg_index); 
+ 
+        if (seg_index == 0) { 
+            return std::min(curr_vector_size, this->segment_size(seg_index)); 
+        } else { 
+            // Perhaps the segment is allocated, but there are no elements in it. 
+            if (curr_vector_size < curr_segment_base) { 
+                return 0; 
+            } 
+            return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base; 
+        } 
+    } 
+ 
+    void deallocate_segment( segment_type address, segment_index_type seg_index ) { 
+        segment_element_allocator_type segment_allocator(base_type::get_allocator()); 
+        size_type first_block = this->my_first_block.load(std::memory_order_relaxed); 
+        if (seg_index >= first_block) { 
+            segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index)); 
+        } 
+        else if (seg_index == 0) { 
+            size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0); 
+            segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate); 
+        } 
+    } 
+ 
+    // destroy_segment function is required by the segment_table base class 
+    void destroy_segment( segment_type address, segment_index_type seg_index ) { 
+        size_type elements_to_destroy = number_of_elements_in_segment(seg_index); 
+        segment_element_allocator_type segment_allocator(base_type::get_allocator()); 
+ 
+        for (size_type i = 0; i < elements_to_destroy; ++i) { 
+            segment_element_allocator_traits::destroy(segment_allocator, address + i); 
+        } 
+ 
+        deallocate_segment(address, seg_index); 
+    } 
+ 
+    // copy_segment function is required by the segment_table base class 
+    void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) { 
+        size_type i = 0; 
+        try_call( [&] { 
+            for (; i != number_of_elements_in_segment(seg_index); ++i) { 
+                segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]); 
+            } 
+        } ).on_exception( [&] { 
+            // Zero-initialize items left not constructed after the exception 
+            zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); 
+ 
+            segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); 
+            auto table = this->get_table(); 
+            for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { 
+                auto curr_segment = table[j].load(std::memory_order_relaxed); 
+                if (curr_segment) { 
+                    zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); 
+                } 
+            } 
+            this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); 
+        }); 
+    } 
+ 
+    // move_segment function is required by the segment_table base class 
+    void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) { 
+        size_type i = 0; 
+        try_call( [&] { 
+            for (; i != number_of_elements_in_segment(seg_index); ++i) { 
+                segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i])); 
+            } 
+        } ).on_exception( [&] { 
+            // Zero-initialize items left not constructed after the exception 
+            zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); 
+ 
+            segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); 
+            auto table = this->get_table(); 
+            for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { 
+                auto curr_segment = table[j].load(std::memory_order_relaxed); 
+                if (curr_segment) { 
+                    zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); 
+                } 
+            } 
+            this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); 
+        }); 
+    } 
+ 
+    static constexpr bool is_first_element_in_segment( size_type index ) { 
+        // An element is the first in a segment if its index is equal to a power of two 
+        return is_power_of_two_at_least(index, 2); 
+    } 
+ 
+    const_reference internal_subscript( size_type index ) const { 
+        return const_cast<self_type*>(this)->internal_subscript(index); 
+    } 
+ 
+    reference internal_subscript( size_type index ) { 
+        __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index"); 
+        return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); 
+    } 
+ 
+    const_reference internal_subscript_with_exceptions( size_type index ) const { 
+        return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index); 
+    } 
+ 
+    reference internal_subscript_with_exceptions( size_type index ) { 
+        if (index >= this->my_size.load(std::memory_order_acquire)) { 
+            tbb::detail::throw_exception(exception_id::out_of_range); 
+        } 
+ 
+        segment_table_type table = this->my_segment_table.load(std::memory_order_acquire); 
+ 
+        size_type seg_index = this->segment_index_of(index); 
+        if (base_type::number_of_segments(table) < seg_index) { 
+            tbb::detail::throw_exception(exception_id::out_of_range); 
+        } 
+ 
+        if (table[seg_index] <= this->segment_allocation_failure_tag) { 
+            tbb::detail::throw_exception(exception_id::out_of_range); 
+        } 
+ 
+        return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); 
+    } 
+ 
+    static void zero_unconstructed_elements( pointer start, size_type count ) { 
+        std::memset(static_cast<void *>(start), 0, count * sizeof(value_type)); 
+    } 
+ 
+    template <typename... Args> 
+    iterator internal_emplace_back( Args&&... args ) { 
+        size_type old_size = this->my_size++; 
+        this->assign_first_block_if_necessary(default_first_block_size); 
+        auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size); 
+ 
+        // try_call API is not convenient here due to broken 
+        // variadic capture on GCC 4.8.5 
+        auto value_guard = make_raii_guard([&] { 
+            zero_unconstructed_elements(element_address, /*count =*/1); 
+        }); 
+ 
+        segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...); 
+        value_guard.dismiss(); 
+        return iterator(*this, old_size, element_address); 
+    } 
+ 
+    template <typename... Args> 
+    void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) { 
+        static_assert(sizeof...(Args) < 2, "Too many parameters"); 
+        for (size_type idx = start_idx; idx < end_idx; ++idx) { 
+            auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); 
+            // try_call API is not convenient here due to broken 
+            // variadic capture on GCC 4.8.5 
+            auto value_guard = make_raii_guard( [&] { 
+                segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); 
+                size_type segment_size = this->segment_size(last_allocated_segment); 
+                end_idx = end_idx < segment_size ? end_idx : segment_size; 
+                for (size_type i = idx; i < end_idx; ++i) { 
+                    zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); 
+                } 
+            }); 
+            segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...); 
+            value_guard.dismiss(); 
+        } 
+    } 
+ 
+    template <typename ForwardIterator> 
+    void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) { 
+        for (size_type idx = start_idx; idx < end_idx; ++idx) { 
+            auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); 
+            try_call( [&] { 
+                segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++); 
+            } ).on_exception( [&] { 
+                segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); 
+                size_type segment_size = this->segment_size(last_allocated_segment); 
+                end_idx = end_idx < segment_size ? end_idx : segment_size; 
+                for (size_type i = idx; i < end_idx; ++i) { 
+                    zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); 
+                } 
+            }); 
+        } 
+    } 
+ 
+    template <typename... Args> 
+    iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) { 
+        this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1); 
+        size_type seg_index = this->segment_index_of(end_idx - 1); 
+        segment_table_type table = this->get_table(); 
+        this->extend_table_if_necessary(table, start_idx, end_idx); 
+ 
+        if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) { 
+            // So that other threads be able to work with the last segment of grow_by, allocate it immediately. 
+            // If the last segment is not less than the first block 
+            if (table[seg_index].load(std::memory_order_relaxed) == nullptr) { 
+                size_type first_element = this->segment_base(seg_index); 
+                if (first_element >= start_idx && first_element < end_idx) { 
+                    segment_type segment = table[seg_index].load(std::memory_order_relaxed); 
+                    base_type::enable_segment(segment, table, seg_index, first_element); 
+                } 
+            } 
+        } 
+ 
+        internal_loop_construct(table, start_idx, end_idx, args...); 
+ 
+        return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx)); 
+    } 
+ 
+ 
+    template <typename... Args> 
+    iterator internal_grow_by_delta( size_type delta, const Args&... args ) { 
+        if (delta == size_type(0)) { 
+            return end(); 
+        } 
+        size_type start_idx = this->my_size.fetch_add(delta); 
+        size_type end_idx = start_idx + delta; 
+ 
+        return internal_grow(start_idx, end_idx, args...); 
+    } 
+ 
+    template <typename... Args> 
+    iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) { 
+        size_type old_size = this->my_size.load(std::memory_order_relaxed); 
+        if (new_size == size_type(0)) return iterator(*this, 0); 
+        while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size)) 
+        {} 
+ 
+        int delta = static_cast<int>(new_size) - static_cast<int>(old_size); 
+        if (delta > 0) { 
+            return internal_grow(old_size, new_size, args...); 
+        } 
+ 
+        size_type end_segment = this->segment_index_of(new_size - 1); 
+ 
+        // Check/wait for segments allocation completes 
+        if (end_segment >= this->pointers_per_embedded_table && 
+            this->get_table() == this->my_embedded_table) 
+        { 
+            spin_wait_while_eq(this->my_segment_table, this->my_embedded_table); 
+        } 
+ 
+        for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) { 
+            if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { 
+                atomic_backoff backoff(true); 
+                while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { 
+                    backoff.pause(); 
+                } 
+            } 
+        } 
+ 
+    #if TBB_USE_DEBUG 
+        size_type cap = capacity(); 
+        __TBB_ASSERT( cap >= new_size, NULL); 
+    #endif 
+        return iterator(*this, size()); 
+    } 
+ 
+    template <typename... Args> 
+    void internal_resize( size_type n, const Args&... args ) { 
+        if (n == 0) { 
+            clear(); 
+            return; 
+        } 
+ 
+        size_type old_size = this->my_size.load(std::memory_order_acquire); 
+        if (n > old_size) { 
+            reserve(n); 
+            grow_to_at_least(n, args...); 
+        } else { 
+            if (old_size == n) { 
+                return; 
+            } 
+            size_type last_segment = this->segment_index_of(old_size - 1); 
+            // Delete segments 
+            for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) { 
+                this->delete_segment(seg_idx); 
+            } 
+ 
+            // If n > segment_size(n) => we need to destroy all of the items in the first segment 
+            // Otherwise, we need to destroy only items with the index < n 
+            size_type n_segment = this->segment_index_of(n - 1); 
+            size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size); 
+            // Destroy elements in curr segment 
+            for (size_type idx = n; idx < last_index_to_destroy; ++idx) { 
+                segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx)); 
+            } 
+            this->my_size.store(n, std::memory_order_release); 
+        } 
+    } 
+ 
+    void destroy_elements() { 
+        allocator_type alloc(base_type::get_allocator()); 
+        for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) { 
+            allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i)); 
+        } 
+        this->my_size.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    static bool incompact_predicate( size_type size ) { 
+        // memory page size 
+        const size_type page_size = 4096; 
+        return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128); 
+    } 
+ 
+    void internal_compact() { 
+        const size_type curr_size = this->my_size.load(std::memory_order_relaxed); 
+        segment_table_type table = this->get_table(); 
+        const segment_index_type k_end = this->find_last_allocated_segment(table);                   // allocated segments 
+        const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;.. 
+        const segment_index_type first_block = this->my_first_block;                                 // number of merged segments, getting values from atomics 
+ 
+        segment_index_type k = first_block; 
+        if (k_stop < first_block) { 
+            k = k_stop; 
+        } 
+        else { 
+            while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++; 
+        } 
+ 
+        if (k_stop == k_end && k == first_block) { 
+            return; 
+        } 
+ 
+        // First segment optimization 
+        if (k != first_block && k) { 
+            size_type max_block = std::max(first_block, k); 
+ 
+            auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block); 
+ 
+            for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { 
+                segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx], 
+                    table[seg_idx].load(std::memory_order_relaxed)); 
+                table[seg_idx].store(nullptr, std::memory_order_relaxed); 
+            } 
+ 
+            this->my_first_block.store(k, std::memory_order_relaxed); 
+            size_type index = 0; 
+            try_call( [&] { 
+                for (; index < std::min(this->segment_size(max_block), curr_size); ++index) { 
+                    auto element_address = &static_cast<base_type*>(this)->operator[](index); 
+                    segment_index_type seg_idx = this->segment_index_of(index); 
+                    segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, 
+                    std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index])); 
+                } 
+            } ).on_exception( [&] { 
+                segment_element_allocator_type allocator(base_type::get_allocator()); 
+                for (size_type i = 0; i < index; ++i) { 
+                    auto element_adress = &this->operator[](i); 
+                    segment_element_allocator_traits::destroy(allocator, element_adress); 
+                } 
+                segment_element_allocator_traits::deallocate(allocator, 
+                    table[0].load(std::memory_order_relaxed), this->segment_size(max_block)); 
+ 
+                for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { 
+                    table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed), 
+                        std::memory_order_relaxed); 
+                    buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed); 
+                } 
+                segment_table_allocator_traits::deallocate(base_type::get_allocator(), 
+                    buffer_table, max_block); 
+                this->my_first_block.store(first_block, std::memory_order_relaxed); 
+            }); 
+ 
+            // Need to correct deallocate old segments 
+            // Method destroy_segment respect active first_block, therefore, 
+            // in order for the segment deletion to work correctly, set the first_block size that was earlier, 
+            // destroy the unnecessary segments. 
+            this->my_first_block.store(first_block, std::memory_order_relaxed); 
+            for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) { 
+                auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed); 
+                if (curr_segment != nullptr) { 
+                    destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1), 
+                        seg_idx - 1); 
+                } 
+            } 
+ 
+            this->my_first_block.store(k, std::memory_order_relaxed); 
+ 
+            for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { 
+                segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]); 
+            } 
+ 
+            segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); 
+        } 
+        // free unnecessary segments allocated by reserve() call 
+        if (k_stop < k_end) { 
+            for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) { 
+                if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) { 
+                    this->delete_segment(seg_idx - 1); 
+                } 
+            } 
+            if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; 
+        } 
+    } 
+ 
+    // Lever for adjusting the size of first_block at the very first insertion. 
+    // TODO: consider >1 value, check performance 
+    static constexpr size_type default_first_block_size = 1; 
+ 
+    template <typename Vector, typename Value> 
+    friend class vector_iterator; 
+}; // class concurrent_vector 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+// Deduction guide for the constructor from two iterators 
+template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, 
+          typename = std::enable_if_t<is_input_iterator_v<It>>, 
+          typename = std::enable_if_t<is_allocator_v<Alloc>>> 
+concurrent_vector( It, It, Alloc = Alloc() ) 
+-> concurrent_vector<iterator_value_t<It>, Alloc>; 
+#endif 
+ 
+template <typename T, typename Allocator> 
+void swap(concurrent_vector<T, Allocator> &lhs, 
+          concurrent_vector<T, Allocator> &rhs) 
+{ 
+    lhs.swap(rhs); 
+} 
+ 
+template <typename T, typename Allocator> 
+bool operator==(const concurrent_vector<T, Allocator> &lhs, 
+                const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); 
+} 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template <typename T, typename Allocator> 
+bool operator!=(const concurrent_vector<T, Allocator> &lhs, 
+                const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return !(lhs == rhs); 
+} 
+#endif // !__TBB_CPP20_COMPARISONS_PRESENT 
+ 
+#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT 
+template <typename T, typename Allocator> 
+tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type> 
+operator<=>(const concurrent_vector<T, Allocator> &lhs, 
+            const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), 
+                                                  rhs.begin(), rhs.end(), 
+                                                  tbb::detail::synthesized_three_way_comparator{}); 
+} 
+ 
+#else 
+ 
+template <typename T, typename Allocator> 
+bool operator<(const concurrent_vector<T, Allocator> &lhs, 
+               const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); 
+} 
+ 
+template <typename T, typename Allocator> 
+bool operator<=(const concurrent_vector<T, Allocator> &lhs, 
+                const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return !(rhs < lhs); 
+} 
+ 
+template <typename T, typename Allocator> 
+bool operator>(const concurrent_vector<T, Allocator> &lhs, 
+               const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return rhs < lhs; 
+} 
+ 
+template <typename T, typename Allocator> 
+bool operator>=(const concurrent_vector<T, Allocator> &lhs, 
+                const concurrent_vector<T, Allocator> &rhs) 
+{ 
+    return !(lhs < rhs); 
+} 
+#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::d1::concurrent_vector; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif // __TBB_concurrent_vector_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h
index 40ba64e43d..83598bbd0d 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h
@@ -1,173 +1,173 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-
-#ifndef __TBB_detail__aggregator_H
-#define __TBB_detail__aggregator_H
-
-#include "_assert.h"
-#include "_utils.h"
-#include <atomic>
-#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc
-#include "../profiling.h"
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-// Base class for aggregated operation
-template <typename Derived>
-class aggregated_operation {
-public:
-    // Zero value means "wait" status, all other values are "user" specified values and
-    // are defined into the scope of a class which uses "status"
-    std::atomic<uintptr_t> status;
-
-    std::atomic<Derived*> next;
-    aggregated_operation() : status{}, next(nullptr) {}
-}; // class aggregated_operation
-
-// Aggregator base class
-/* An aggregator for collecting operations coming from multiple sources and executing
-   them serially on a single thread.  OperationType must be derived from
-   aggregated_operation. The parameter HandlerType is a functor that will be passed the
-   list of operations and is expected to handle each operation appropriately, setting the
-   status of each operation to non-zero. */
-template <typename OperationType>
-class aggregator_generic {
-public:
-    aggregator_generic() : pending_operations(nullptr), handler_busy(false) {}
-
-    // Execute an operation
-    /* Places an operation into the waitlist (pending_operations), and either handles the list,
-       or waits for the operation to complete, or returns.
-       The long_life_time parameter specifies the life time of the given operation object.
-       Operations with long_life_time == true may be accessed after execution.
-       A "short" life time operation (long_life_time == false) can be destroyed
-       during execution, and so any access to it after it was put into the waitlist,
-       including status check, is invalid. As a consequence, waiting for completion
-       of such operation causes undefined behavior. */
-    template <typename HandlerType>
-    void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) {
-        // op->status should be read before inserting the operation into the
-        // aggregator waitlist since it can become invalid after executing a
-        // handler (if the operation has 'short' life time.)
-        const uintptr_t status = op->status.load(std::memory_order_relaxed);
-
-        // ITT note: &(op->status) tag is used to cover accesses to this op node. This
-        // thread has created the operation, and now releases it so that the handler
-        // thread may handle the associated operation w/o triggering a race condition;
-        // thus this tag will be acquired just before the operation is handled in the
-        // handle_operations functor.
-        call_itt_notify(releasing, &(op->status));
-        // insert the operation in the queue.
-        OperationType* res = pending_operations.load(std::memory_order_relaxed);
-        do {
-            op->next.store(res, std::memory_order_relaxed);
-        } while (!pending_operations.compare_exchange_strong(res, op));
-        if (!res) { // first in the list; handle the operations
-            // ITT note: &pending_operations tag covers access to the handler_busy flag,
-            // which this waiting handler thread will try to set before entering
-            // handle_operations.
-            call_itt_notify(acquired, &pending_operations);
-            start_handle_operations(handle_operations);
-            // The operation with 'short' life time can already be destroyed
-            if (long_life_time)
-                __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL);
-        }
-        // Not first; wait for op to be ready
-        else if (!status) { // operation is blocking here.
-            __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing");
-            call_itt_notify(prepare, &(op->status));
-            spin_wait_while_eq(op->status, uintptr_t(0));
-        }
-   }
-
-private:
-    // Trigger the handling of operations when the handler is free
-    template <typename HandlerType>
-    void start_handle_operations( HandlerType& handle_operations ) {
-        OperationType* op_list;
-
-        // ITT note: &handler_busy tag covers access to pending_operations as it is passed
-        // between active and waiting handlers.  Below, the waiting handler waits until
-        // the active handler releases, and the waiting handler acquires &handler_busy as
-        // it becomes the active_handler. The release point is at the end of this
-        // function, when all operations in pending_operations have been handled by the
-        // owner of this aggregator.
-        call_itt_notify(prepare, &handler_busy);
-        // get the handler_busy:
-        // only one thread can possibly spin here at a time
-        spin_wait_until_eq(handler_busy, uintptr_t(0));
-        call_itt_notify(acquired, &handler_busy);
-        // acquire fence not necessary here due to causality rule and surrounding atomics
-        handler_busy.store(1, std::memory_order_relaxed);
-
-        // ITT note: &pending_operations tag covers access to the handler_busy flag
-        // itself. Capturing the state of the pending_operations signifies that
-        // handler_busy has been set and a new active handler will now process that list's
-        // operations.
-        call_itt_notify(releasing, &pending_operations);
-        // grab pending_operations
-        op_list = pending_operations.exchange(nullptr);
-
-        // handle all the operations
-        handle_operations(op_list);
-
-        // release the handler
-        handler_busy.store(0, std::memory_order_release);
-    }
-
-    // An atomically updated list (aka mailbox) of pending operations
-    std::atomic<OperationType*> pending_operations;
-    // Controls threads access to handle_operations
-    std::atomic<uintptr_t> handler_busy;
-}; // class aggregator_generic
-
-template <typename HandlerType, typename OperationType>
-class aggregator : public aggregator_generic<OperationType> {
-    HandlerType handle_operations;
-public:
-    aggregator() = default;
-
-    void initialize_handler( HandlerType h ) { handle_operations = h; }
-
-    void execute(OperationType* op) {
-        aggregator_generic<OperationType>::execute(op, handle_operations);
-    }
-}; // class aggregator
-
-// the most-compatible friend declaration (vs, gcc, icc) is
-// template<class U, class V> friend class aggregating_functor;
-template <typename AggregatingClass, typename OperationList>
-class aggregating_functor {
-    AggregatingClass* my_object;
-public:
-    aggregating_functor() = default;
-    aggregating_functor( AggregatingClass* object ) : my_object(object) {
-        __TBB_ASSERT(my_object, nullptr);
-    }
-
-    void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); }
-}; // class aggregating_functor
-
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__aggregator_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+ 
+#ifndef __TBB_detail__aggregator_H 
+#define __TBB_detail__aggregator_H 
+ 
+#include "_assert.h" 
+#include "_utils.h" 
+#include <atomic> 
+#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc 
+#include "../profiling.h" 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+// Base class for aggregated operation 
+template <typename Derived> 
+class aggregated_operation { 
+public: 
+    // Zero value means "wait" status, all other values are "user" specified values and 
+    // are defined into the scope of a class which uses "status" 
+    std::atomic<uintptr_t> status; 
+ 
+    std::atomic<Derived*> next; 
+    aggregated_operation() : status{}, next(nullptr) {} 
+}; // class aggregated_operation 
+ 
+// Aggregator base class 
+/* An aggregator for collecting operations coming from multiple sources and executing 
+   them serially on a single thread.  OperationType must be derived from 
+   aggregated_operation. The parameter HandlerType is a functor that will be passed the 
+   list of operations and is expected to handle each operation appropriately, setting the 
+   status of each operation to non-zero. */ 
+template <typename OperationType> 
+class aggregator_generic { 
+public: 
+    aggregator_generic() : pending_operations(nullptr), handler_busy(false) {} 
+ 
+    // Execute an operation 
+    /* Places an operation into the waitlist (pending_operations), and either handles the list, 
+       or waits for the operation to complete, or returns. 
+       The long_life_time parameter specifies the life time of the given operation object. 
+       Operations with long_life_time == true may be accessed after execution. 
+       A "short" life time operation (long_life_time == false) can be destroyed 
+       during execution, and so any access to it after it was put into the waitlist, 
+       including status check, is invalid. As a consequence, waiting for completion 
+       of such operation causes undefined behavior. */ 
+    template <typename HandlerType> 
+    void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) { 
+        // op->status should be read before inserting the operation into the 
+        // aggregator waitlist since it can become invalid after executing a 
+        // handler (if the operation has 'short' life time.) 
+        const uintptr_t status = op->status.load(std::memory_order_relaxed); 
+ 
+        // ITT note: &(op->status) tag is used to cover accesses to this op node. This 
+        // thread has created the operation, and now releases it so that the handler 
+        // thread may handle the associated operation w/o triggering a race condition; 
+        // thus this tag will be acquired just before the operation is handled in the 
+        // handle_operations functor. 
+        call_itt_notify(releasing, &(op->status)); 
+        // insert the operation in the queue. 
+        OperationType* res = pending_operations.load(std::memory_order_relaxed); 
+        do { 
+            op->next.store(res, std::memory_order_relaxed); 
+        } while (!pending_operations.compare_exchange_strong(res, op)); 
+        if (!res) { // first in the list; handle the operations 
+            // ITT note: &pending_operations tag covers access to the handler_busy flag, 
+            // which this waiting handler thread will try to set before entering 
+            // handle_operations. 
+            call_itt_notify(acquired, &pending_operations); 
+            start_handle_operations(handle_operations); 
+            // The operation with 'short' life time can already be destroyed 
+            if (long_life_time) 
+                __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL); 
+        } 
+        // Not first; wait for op to be ready 
+        else if (!status) { // operation is blocking here. 
+            __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing"); 
+            call_itt_notify(prepare, &(op->status)); 
+            spin_wait_while_eq(op->status, uintptr_t(0)); 
+        } 
+   } 
+ 
+private: 
+    // Trigger the handling of operations when the handler is free 
+    template <typename HandlerType> 
+    void start_handle_operations( HandlerType& handle_operations ) { 
+        OperationType* op_list; 
+ 
+        // ITT note: &handler_busy tag covers access to pending_operations as it is passed 
+        // between active and waiting handlers.  Below, the waiting handler waits until 
+        // the active handler releases, and the waiting handler acquires &handler_busy as 
+        // it becomes the active_handler. The release point is at the end of this 
+        // function, when all operations in pending_operations have been handled by the 
+        // owner of this aggregator. 
+        call_itt_notify(prepare, &handler_busy); 
+        // get the handler_busy: 
+        // only one thread can possibly spin here at a time 
+        spin_wait_until_eq(handler_busy, uintptr_t(0)); 
+        call_itt_notify(acquired, &handler_busy); 
+        // acquire fence not necessary here due to causality rule and surrounding atomics 
+        handler_busy.store(1, std::memory_order_relaxed); 
+ 
+        // ITT note: &pending_operations tag covers access to the handler_busy flag 
+        // itself. Capturing the state of the pending_operations signifies that 
+        // handler_busy has been set and a new active handler will now process that list's 
+        // operations. 
+        call_itt_notify(releasing, &pending_operations); 
+        // grab pending_operations 
+        op_list = pending_operations.exchange(nullptr); 
+ 
+        // handle all the operations 
+        handle_operations(op_list); 
+ 
+        // release the handler 
+        handler_busy.store(0, std::memory_order_release); 
+    } 
+ 
+    // An atomically updated list (aka mailbox) of pending operations 
+    std::atomic<OperationType*> pending_operations; 
+    // Controls threads access to handle_operations 
+    std::atomic<uintptr_t> handler_busy; 
+}; // class aggregator_generic 
+ 
+template <typename HandlerType, typename OperationType> 
+class aggregator : public aggregator_generic<OperationType> { 
+    HandlerType handle_operations; 
+public: 
+    aggregator() = default; 
+ 
+    void initialize_handler( HandlerType h ) { handle_operations = h; } 
+ 
+    void execute(OperationType* op) { 
+        aggregator_generic<OperationType>::execute(op, handle_operations); 
+    } 
+}; // class aggregator 
+ 
+// the most-compatible friend declaration (vs, gcc, icc) is 
+// template<class U, class V> friend class aggregating_functor; 
+template <typename AggregatingClass, typename OperationList> 
+class aggregating_functor { 
+    AggregatingClass* my_object; 
+public: 
+    aggregating_functor() = default; 
+    aggregating_functor( AggregatingClass* object ) : my_object(object) { 
+        __TBB_ASSERT(my_object, nullptr); 
+    } 
+ 
+    void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } 
+}; // class aggregating_functor 
+ 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__aggregator_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h
index 13857c47cc..6889983b74 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h
@@ -1,46 +1,46 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-#ifndef __TBB_aligned_space_H
-#define __TBB_aligned_space_H
-
-#include <cstddef>
-
-#include "_template_helpers.h"
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-//! Block of space aligned sufficiently to construct an array T with N elements.
-/** The elements are not constructed or destroyed by this class.
-    @ingroup memory_allocation */
-template<typename T, std::size_t N = 1>
-class aligned_space {
-    alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)];
-
-public:
-    //! Pointer to beginning of array
-    T* begin() const { return punned_cast<T*>(&aligned_array); }
-
-    //! Pointer to one past last element in array.
-    T* end() const { return begin() + N; }
-};
-
-} // namespace d0
-} // namespace detail
-} // namespace tbb
-
-#endif /* __TBB_aligned_space_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+#ifndef __TBB_aligned_space_H 
+#define __TBB_aligned_space_H 
+ 
+#include <cstddef> 
+ 
+#include "_template_helpers.h" 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+//! Block of space aligned sufficiently to construct an array T with N elements. 
+/** The elements are not constructed or destroyed by this class. 
+    @ingroup memory_allocation */ 
+template<typename T, std::size_t N = 1> 
+class aligned_space { 
+    alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)]; 
+ 
+public: 
+    //! Pointer to beginning of array 
+    T* begin() const { return punned_cast<T*>(&aligned_array); } 
+ 
+    //! Pointer to one past last element in array. 
+    T* end() const { return begin() + N; } 
+}; 
+ 
+} // namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif /* __TBB_aligned_space_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h
index 8c60e25e7e..c3485d1424 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h
@@ -1,107 +1,107 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__allocator_traits_H
-#define __TBB_detail__allocator_traits_H
-
-#include "_config.h"
-#include "_template_helpers.h"
-#include <memory>
-#include <type_traits>
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
-// Struct is_always_equal_detector provides the member type "type" which is
-// Allocator::is_always_equal if it is present, std::false_type otherwise
-template <typename Allocator, typename = void>
-struct is_always_equal_detector {
-    using type = std::false_type;
-};
-
-template <typename Allocator>
-struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>>
-{
-    using type = typename Allocator::is_always_equal;
-};
-#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
-
-template <typename Allocator>
-class allocator_traits : public std::allocator_traits<Allocator>
-{
-    using base_type = std::allocator_traits<Allocator>;
-public:
-#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
-    using is_always_equal = typename is_always_equal_detector<Allocator>::type;
-#endif
-
-    template <typename T>
-    using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>;
-}; // struct allocator_traits
-
-template <typename Allocator>
-void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) {
-    lhs = rhs;
-}
-
-template <typename Allocator>
-void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {}
-
-// Copy assigns allocators only if propagate_on_container_copy_assignment is true
-template <typename Allocator>
-void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) {
-    using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment;
-    copy_assign_allocators_impl(lhs, rhs, pocca_type());
-}
-
-template <typename Allocator>
-void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) {
-    lhs = std::move(rhs);
-}
-
-template <typename Allocator>
-void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {}
-
-// Move assigns allocators only if propagate_on_container_move_assignment is true
-template <typename Allocator>
-void move_assign_allocators( Allocator& lhs, Allocator& rhs ) {
-    using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment;
-    move_assign_allocators_impl(lhs, rhs, pocma_type());
-}
-
-template <typename Allocator>
-void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) {
-    using std::swap;
-    swap(lhs, rhs);
-}
-
-template <typename Allocator>
-void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {}
-
-// Swaps allocators only if propagate_on_container_swap is true
-template <typename Allocator>
-void swap_allocators( Allocator& lhs, Allocator& rhs ) {
-    using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap;
-    swap_allocators_impl(lhs, rhs, pocs_type());
-}
-
-} // inline namespace d0
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__allocator_traits_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__allocator_traits_H 
+#define __TBB_detail__allocator_traits_H 
+ 
+#include "_config.h" 
+#include "_template_helpers.h" 
+#include <memory> 
+#include <type_traits> 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT 
+// Struct is_always_equal_detector provides the member type "type" which is 
+// Allocator::is_always_equal if it is present, std::false_type otherwise 
+template <typename Allocator, typename = void> 
+struct is_always_equal_detector { 
+    using type = std::false_type; 
+}; 
+ 
+template <typename Allocator> 
+struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>> 
+{ 
+    using type = typename Allocator::is_always_equal; 
+}; 
+#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT 
+ 
+template <typename Allocator> 
+class allocator_traits : public std::allocator_traits<Allocator> 
+{ 
+    using base_type = std::allocator_traits<Allocator>; 
+public: 
+#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT 
+    using is_always_equal = typename is_always_equal_detector<Allocator>::type; 
+#endif 
+ 
+    template <typename T> 
+    using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>; 
+}; // struct allocator_traits 
+ 
+template <typename Allocator> 
+void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) { 
+    lhs = rhs; 
+} 
+ 
+template <typename Allocator> 
+void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {} 
+ 
+// Copy assigns allocators only if propagate_on_container_copy_assignment is true 
+template <typename Allocator> 
+void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) { 
+    using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment; 
+    copy_assign_allocators_impl(lhs, rhs, pocca_type()); 
+} 
+ 
+template <typename Allocator> 
+void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) { 
+    lhs = std::move(rhs); 
+} 
+ 
+template <typename Allocator> 
+void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {} 
+ 
+// Move assigns allocators only if propagate_on_container_move_assignment is true 
+template <typename Allocator> 
+void move_assign_allocators( Allocator& lhs, Allocator& rhs ) { 
+    using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment; 
+    move_assign_allocators_impl(lhs, rhs, pocma_type()); 
+} 
+ 
+template <typename Allocator> 
+void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) { 
+    using std::swap; 
+    swap(lhs, rhs); 
+} 
+ 
+template <typename Allocator> 
+void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {} 
+ 
+// Swaps allocators only if propagate_on_container_swap is true 
+template <typename Allocator> 
+void swap_allocators( Allocator& lhs, Allocator& rhs ) { 
+    using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap; 
+    swap_allocators_impl(lhs, rhs, pocs_type()); 
+} 
+ 
+} // inline namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__allocator_traits_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h
index 4116386a92..d89e1178c8 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h
@@ -1,52 +1,52 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__assert_H
-#define __TBB_detail__assert_H
-
-#include "_config.h"
-
-namespace tbb {
-namespace detail {
-namespace r1 {
-//! Process an assertion failure.
-/** Normally called from __TBB_ASSERT macro.
-  If assertion handler is null, print message for assertion failure and abort.
-  Otherwise call the assertion handler. */
-void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment);
-} // namespace r1
-} // namespace detail
-} // namespace tbb
-
-//! Release version of assertions
-#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message))
-
-#if TBB_USE_ASSERT
-    //! Assert that predicate is true.
-    /** If predicate is false, print assertion failure message.
-        If the comment argument is not NULL, it is printed as part of the failure message.
-        The comment argument has no other effect. */
-    #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message)
-    //! "Extended" version
-    #define __TBB_ASSERT_EX __TBB_ASSERT
-#else
-    //! No-op version of __TBB_ASSERT.
-    #define __TBB_ASSERT(predicate,comment) ((void)0)
-    //! "Extended" version is useful to suppress warnings if a variable is only used with an assert
-    #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
-#endif // TBB_USE_ASSERT
-
-#endif // __TBB_detail__assert_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__assert_H 
+#define __TBB_detail__assert_H 
+ 
+#include "_config.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace r1 { 
+//! Process an assertion failure. 
+/** Normally called from __TBB_ASSERT macro. 
+  If assertion handler is null, print message for assertion failure and abort. 
+  Otherwise call the assertion handler. */ 
+void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment); 
+} // namespace r1 
+} // namespace detail 
+} // namespace tbb 
+ 
+//! Release version of assertions 
+#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message)) 
+ 
+#if TBB_USE_ASSERT 
+    //! Assert that predicate is true. 
+    /** If predicate is false, print assertion failure message. 
+        If the comment argument is not NULL, it is printed as part of the failure message. 
+        The comment argument has no other effect. */ 
+    #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) 
+    //! "Extended" version 
+    #define __TBB_ASSERT_EX __TBB_ASSERT 
+#else 
+    //! No-op version of __TBB_ASSERT. 
+    #define __TBB_ASSERT(predicate,comment) ((void)0) 
+    //! "Extended" version is useful to suppress warnings if a variable is only used with an assert 
+    #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) 
+#endif // TBB_USE_ASSERT 
+ 
+#endif // __TBB_detail__assert_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h
index 6289632601..d8df4d5c3f 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h
@@ -1,659 +1,659 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__concurrent_queue_base_H
-#define __TBB_detail__concurrent_queue_base_H
-
-#include "_utils.h"
-#include "_exception.h"
-#include "_machine.h"
-#include "_allocator_traits.h"
-
-#include "../profiling.h"
-#include "../spin_mutex.h"
-#include "../cache_aligned_allocator.h"
-
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-using ticket_type = std::size_t;
-
-template <typename Page>
-inline bool is_valid_page(const Page p) {
-    return reinterpret_cast<std::uintptr_t>(p) > 1;
-}
-
-template <typename T, typename Allocator>
-struct concurrent_queue_rep;
-
-template <typename Container, typename T, typename Allocator>
-class micro_queue_pop_finalizer;
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-// unary minus operator applied to unsigned type, result still unsigned
-#pragma warning( push )
-#pragma warning( disable: 4146 )
-#endif
-
-// A queue using simple locking.
-// For efficiency, this class has no constructor.
-// The caller is expected to zero-initialize it.
-template <typename T, typename Allocator>
-class micro_queue {
-private:
-    using queue_rep_type = concurrent_queue_rep<T, Allocator>;
-    using self_type = micro_queue<T, Allocator>;
-public:
-    using size_type = std::size_t;
-    using value_type = T;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-
-    using allocator_type = Allocator;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-
-    static constexpr size_type item_size = sizeof(T);
-    static constexpr size_type items_per_page = item_size <=   8 ? 32 :
-                                                item_size <=  16 ? 16 :
-                                                item_size <=  32 ?  8 :
-                                                item_size <=  64 ?  4 :
-                                                item_size <= 128 ?  2 : 1;
-
-    struct padded_page {
-        padded_page() {}
-        ~padded_page() {}
-
-        reference operator[] (std::size_t index) {
-            __TBB_ASSERT(index < items_per_page, "Index out of range");
-            return items[index];
-        }
-
-        const_reference operator[] (std::size_t index) const {
-            __TBB_ASSERT(index < items_per_page, "Index out of range");
-            return items[index];
-        }
-
-        padded_page* next{ nullptr };
-        std::atomic<std::uintptr_t> mask{};
-
-        union {
-            value_type items[items_per_page];
-        };
-    }; // struct padded_page
-
-    using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>;
-protected:
-    using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>;
-
-public:
-    using item_constructor_type = void (*)(value_type* location, const void* src);
-    micro_queue() = default;
-    micro_queue( const micro_queue& ) = delete;
-    micro_queue& operator=( const micro_queue& ) = delete;
-
-    size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator,
-                            padded_page*& p ) {
-        __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page");
-        k &= -queue_rep_type::n_queue;
-        size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page);
-        if (!index) {
-            try_call( [&] {
-                p = page_allocator_traits::allocate(page_allocator, 1);
-            }).on_exception( [&] {
-                ++base.n_invalid_entries;
-                invalidate_page( k );
-            });
-            page_allocator_traits::construct(page_allocator, p);
-        }
-
-        if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base);
-        call_itt_notify(acquired, &tail_counter);
-
-        if (p) {
-            spin_mutex::scoped_lock lock( page_mutex );
-            padded_page* q = tail_page.load(std::memory_order_relaxed);
-            if (is_valid_page(q)) {
-                q->next = p;
-            } else {
-                head_page.store(p, std::memory_order_relaxed);
-            }
-            tail_page.store(p, std::memory_order_relaxed);;
-        } else {
-            p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ?
-        }
-        return index;
-    }
-
-    template<typename... Args>
-    void push( ticket_type k, queue_rep_type& base, Args&&... args )
-    {
-        padded_page* p = nullptr;
-        page_allocator_type page_allocator(base.get_allocator());
-        size_type index = prepare_page(k, base, page_allocator, p);
-        __TBB_ASSERT(p != nullptr, "Page was not prepared");
-
-        // try_call API is not convenient here due to broken
-        // variadic capture on GCC 4.8.5
-        auto value_guard = make_raii_guard([&] {
-            ++base.n_invalid_entries;
-            call_itt_notify(releasing, &tail_counter);
-            tail_counter.fetch_add(queue_rep_type::n_queue);
-        });
-
-        page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...);
-        // If no exception was thrown, mark item as present.
-        p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed);
-        call_itt_notify(releasing, &tail_counter);
-
-        value_guard.dismiss();
-        tail_counter.fetch_add(queue_rep_type::n_queue);
-    }
-
-    void abort_push( ticket_type k, queue_rep_type& base) {
-        padded_page* p = nullptr;
-        prepare_page(k, base, base.get_allocator(), p);
-        ++base.n_invalid_entries;
-        tail_counter.fetch_add(queue_rep_type::n_queue);
-    }
-
-    bool pop( void* dst, ticket_type k, queue_rep_type& base ) {
-        k &= -queue_rep_type::n_queue;
-        if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k);
-        call_itt_notify(acquired, &head_counter);
-        if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k);
-        call_itt_notify(acquired, &tail_counter);
-        padded_page *p = head_page.load(std::memory_order_acquire);
-        __TBB_ASSERT( p, nullptr );
-        size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page );
-        bool success = false;
-        {
-            page_allocator_type page_allocator(base.get_allocator());
-            micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator,
-                k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr );
-            if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) {
-                success = true;
-                assign_and_destroy_item( dst, *p, index );
-            } else {
-                --base.n_invalid_entries;
-            }
-        }
-        return success;
-    }
-
-    micro_queue& assign( const micro_queue& src, queue_rep_type& base,
-        item_constructor_type construct_item )
-    {
-        head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
-
-        const padded_page* srcp = src.head_page.load(std::memory_order_relaxed);
-        if( is_valid_page(srcp) ) {
-            ticket_type g_index = head_counter.load(std::memory_order_relaxed);
-            size_type n_items  = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed))
-                / queue_rep_type::n_queue;
-            size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page);
-            size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page;
-
-            try_call( [&] {
-                head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed);
-            }).on_exception( [&] {
-                head_counter.store(0, std::memory_order_relaxed);
-                tail_counter.store(0, std::memory_order_relaxed);
-            });
-            padded_page* cur_page = head_page.load(std::memory_order_relaxed);
-
-            try_call( [&] {
-                if (srcp != src.tail_page.load(std::memory_order_relaxed)) {
-                    for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) {
-                        cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item );
-                        cur_page = cur_page->next;
-                    }
-
-                    __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr );
-                    size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page);
-                    if( last_index==0 ) last_index = items_per_page;
-
-                    cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item );
-                    cur_page = cur_page->next;
-                }
-                tail_page.store(cur_page, std::memory_order_relaxed);
-            }).on_exception( [&] {
-                padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
-                tail_page.store(invalid_page, std::memory_order_relaxed);
-            });
-        } else {
-            head_page.store(nullptr, std::memory_order_relaxed);
-            tail_page.store(nullptr, std::memory_order_relaxed);
-        }
-        return *this;
-    }
-
-    padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page,
-        size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item )
-    {
-        page_allocator_type page_allocator(base.get_allocator());
-        padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1);
-        new_page->next = nullptr;
-        new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) {
-            if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) {
-                copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item);
-            }
-        }
-        return new_page;
-    }
-
-    void invalidate_page( ticket_type k )  {
-        // Append an invalid page at address 1 so that no more pushes are allowed.
-        padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
-        {
-            spin_mutex::scoped_lock lock( page_mutex );
-            tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed);
-            padded_page* q = tail_page.load(std::memory_order_relaxed);
-            if (is_valid_page(q)) {
-                q->next = invalid_page;
-            } else {
-                head_page.store(invalid_page, std::memory_order_relaxed);
-            }
-            tail_page.store(invalid_page, std::memory_order_relaxed);
-        }
-    }
-
-    padded_page* get_tail_page() {
-        return tail_page.load(std::memory_order_relaxed);
-    }
-
-    padded_page* get_head_page() {
-        return head_page.load(std::memory_order_relaxed);
-    }
-
-    void set_tail_page( padded_page* pg ) {
-        tail_page.store(pg, std::memory_order_relaxed);
-    }
-
-    void clear(queue_rep_type& base) {
-        padded_page* curr_page = head_page.load(std::memory_order_relaxed);
-        std::size_t index = head_counter.load(std::memory_order_relaxed);
-        page_allocator_type page_allocator(base.get_allocator());
-
-        while (curr_page) {
-            for (; index != items_per_page - 1; ++index) {
-                curr_page->operator[](index).~value_type();
-            }
-                padded_page* next_page = curr_page->next;
-                page_allocator_traits::destroy(page_allocator, curr_page);
-                page_allocator_traits::deallocate(page_allocator, curr_page, 1);
-                curr_page = next_page;
-        }
-
-        padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
-        head_page.store(invalid_page, std::memory_order_relaxed);
-        tail_page.store(invalid_page, std::memory_order_relaxed);
-    }
-
-private:
-    // template <typename U, typename A>
-    friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>;
-
-    // Class used to ensure exception-safety of method "pop"
-    class destroyer  {
-        value_type& my_value;
-    public:
-        destroyer( reference value ) : my_value(value) {}
-        destroyer( const destroyer& ) = delete;
-        destroyer& operator=( const destroyer& ) = delete;
-        ~destroyer() {my_value.~T();}
-    }; // class destroyer
-
-    void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex,
-        item_constructor_type construct_item )
-    {
-        auto& src_item = src[sindex];
-        construct_item( &dst[dindex], static_cast<const void*>(&src_item) );
-    }
-
-    void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) {
-        auto& from = src[index];
-        destroyer d(from);
-        *static_cast<T*>(dst) = std::move(from);
-    }
-
-    void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const {
-        for (atomic_backoff b(true);; b.pause()) {
-            ticket_type c = counter;
-            if (c == k) return;
-            else if (c & 1) {
-                ++rb.n_invalid_entries;
-                throw_exception( exception_id::bad_last_alloc);
-            }
-        }
-    }
-
-    std::atomic<padded_page*> head_page{};
-    std::atomic<ticket_type> head_counter{};
-
-    std::atomic<padded_page*> tail_page{};
-    std::atomic<ticket_type> tail_counter{};
-
-    spin_mutex page_mutex{};
-}; // class micro_queue
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-#pragma warning( pop )
-#endif // warning 4146 is back
-
-template <typename Container, typename T, typename Allocator>
-class micro_queue_pop_finalizer {
-public:
-    using padded_page = typename Container::padded_page;
-    using allocator_type = Allocator;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-
-    micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) :
-        my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc)
-    {}
-
-    micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete;
-    micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete;
-
-    ~micro_queue_pop_finalizer() {
-        padded_page* p = my_page;
-        if( is_valid_page(p) ) {
-            spin_mutex::scoped_lock lock( my_queue.page_mutex );
-            padded_page* q = p->next;
-            my_queue.head_page.store(q, std::memory_order_relaxed);
-            if( !is_valid_page(q) ) {
-                my_queue.tail_page.store(nullptr, std::memory_order_relaxed);
-            }
-        }
-        my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed);
-        if ( is_valid_page(p) ) {
-            allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p));
-            allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1);
-        }
-    }
-private:
-    ticket_type my_ticket_type;
-    Container& my_queue;
-    padded_page* my_page;
-    Allocator& allocator;
-}; // class micro_queue_pop_finalizer
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-// structure was padded due to alignment specifier
-#pragma warning( push )
-#pragma warning( disable: 4324 )
-#endif
-
-template <typename T, typename Allocator>
-struct concurrent_queue_rep {
-    using self_type = concurrent_queue_rep<T, Allocator>;
-    using size_type = std::size_t;
-    using micro_queue_type = micro_queue<T, Allocator>;
-    using allocator_type = Allocator;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-    using padded_page = typename micro_queue_type::padded_page;
-    using page_allocator_type = typename micro_queue_type::page_allocator_type;
-    using item_constructor_type = typename micro_queue_type::item_constructor_type;
-private:
-    using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>;
-    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>;
-
-public:
-    // must be power of 2
-    static constexpr size_type n_queue = 8;
-    // Approximately n_queue/golden ratio
-    static constexpr size_type phi = 3;
-    static constexpr size_type item_size = micro_queue_type::item_size;
-    static constexpr size_type items_per_page = micro_queue_type::items_per_page;
-
-    concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc)
-    {}
-
-    concurrent_queue_rep( const concurrent_queue_rep& ) = delete;
-    concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete;
-
-    void clear() {
-        page_allocator_type page_allocator(my_queue_allocator);
-        for (size_type i = 0; i < n_queue; ++i) {
-            padded_page* tail_page = array[i].get_tail_page();
-            if( is_valid_page(tail_page) ) {
-                __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" );
-                page_allocator_traits::destroy(page_allocator, static_cast<padded_page*>(tail_page));
-                page_allocator_traits::deallocate(page_allocator, static_cast<padded_page*>(tail_page), 1);
-                array[i].set_tail_page(nullptr);
-            } else {
-                __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?");
-            }
-        }
-    }
-
-    void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) {
-        head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed);
-
-        // copy or move micro_queues
-        size_type queue_idx = 0;
-        try_call( [&] {
-            for (; queue_idx < n_queue; ++queue_idx) {
-                array[queue_idx].assign(src.array[queue_idx], *this, construct_item);
-            }
-        }).on_exception( [&] {
-            for (size_type i = 0; i < queue_idx + 1; ++i) {
-                array[i].clear(*this);
-            }
-            head_counter.store(0, std::memory_order_relaxed);
-            tail_counter.store(0, std::memory_order_relaxed);
-            n_invalid_entries.store(0, std::memory_order_relaxed);
-        });
-
-        __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) &&
-                     tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed),
-                     "the source concurrent queue should not be concurrently modified." );
-    }
-
-    bool empty() const {
-        ticket_type tc = tail_counter.load(std::memory_order_acquire);
-        ticket_type hc = head_counter.load(std::memory_order_relaxed);
-        // if tc!=r.tail_counter, the queue was not empty at some point between the two reads.
-        return tc == tail_counter.load(std::memory_order_relaxed) &&
-               std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0;
-    }
-
-    std::ptrdiff_t size() const {
-        __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL);
-        std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire);
-        std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed);
-        std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed);
-
-        return tc - hc - nie;
-    }
-
-    queue_allocator_type& get_allocator() {
-        return my_queue_allocator;
-    }
-
-    friend class micro_queue<T, Allocator>;
-
-    // Map ticket_type to an array index
-    static size_type index( ticket_type k ) {
-        return k * phi % n_queue;
-    }
-
-    micro_queue_type& choose( ticket_type k ) {
-        // The formula here approximates LRU in a cache-oblivious way.
-        return array[index(k)];
-    }
-
-    alignas(max_nfs_size) micro_queue_type array[n_queue];
-
-    alignas(max_nfs_size) std::atomic<ticket_type> head_counter{};
-    alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{};
-    alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{};
-    queue_allocator_type& my_queue_allocator;
-}; // class concurrent_queue_rep
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-#pragma warning( pop )
-#endif
-
-template <typename Value, typename Allocator>
-class concurrent_queue_iterator_base {
-    using queue_rep_type = concurrent_queue_rep<Value, Allocator>;
-    using padded_page = typename queue_rep_type::padded_page;
-protected:
-    concurrent_queue_iterator_base() = default;
-
-    concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) {
-        assign(other);
-    }
-
-    concurrent_queue_iterator_base( queue_rep_type* queue_rep )
-        : my_queue_rep(queue_rep),
-          my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed))
-    {
-        for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) {
-            my_array[i] = my_queue_rep->array[i].get_head_page();
-        }
-
-        if (!get_item(my_item, my_head_counter)) advance();
-    }
-
-    void assign( const concurrent_queue_iterator_base& other ) {
-        my_item = other.my_item;
-        my_queue_rep = other.my_queue_rep;
-
-        if (my_queue_rep != nullptr) {
-            my_head_counter = other.my_head_counter;
-
-            for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) {
-                my_array[i] = other.my_array[i];
-            }
-        }
-    }
-
-    void advance() {
-        __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue");
-        std::size_t k = my_head_counter;
-#if TBB_USE_ASSERT
-        Value* tmp;
-        get_item(tmp, k);
-        __TBB_ASSERT(my_item == tmp, nullptr);
-#endif
-        std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page);
-        if (i == my_queue_rep->items_per_page - 1) {
-            padded_page*& root = my_array[queue_rep_type::index(k)];
-            root = root->next;
-        }
-        // Advance k
-        my_head_counter = ++k;
-        if (!get_item(my_item, k)) advance();
-    }
-
-    concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) {
-        this->assign(other);
-        return *this;
-    }
-
-    bool get_item( Value*& item, std::size_t k ) {
-        if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) {
-            item = nullptr;
-            return true;
-        } else {
-            padded_page* p = my_array[queue_rep_type::index(k)];
-            __TBB_ASSERT(p, nullptr);
-            std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page);
-            item = &(*p)[i];
-            return (p->mask & uintptr_t(1) << i) != 0;
-        }
-    }
-
-    Value* my_item{ nullptr };
-    queue_rep_type* my_queue_rep{ nullptr };
-    ticket_type my_head_counter{};
-    padded_page* my_array[queue_rep_type::n_queue];
-}; // class concurrent_queue_iterator_base
-
-struct concurrent_queue_iterator_provider {
-    template <typename Iterator, typename Container>
-    static Iterator get( const Container& container ) {
-        return Iterator(container);
-    }
-}; // struct concurrent_queue_iterator_provider
-
-template <typename Container, typename Value, typename Allocator>
-class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> {
-    using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>;
-public:
-    using value_type = Value;
-    using pointer = value_type*;
-    using reference = value_type&;
-    using difference_type = std::ptrdiff_t;
-    using iterator_category = std::forward_iterator_tag;
-
-    concurrent_queue_iterator() = default;
-
-    /** If Value==Container::value_type, then this routine is the copy constructor.
-        If Value==const Container::value_type, then this routine is a conversion constructor. */
-    concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other )
-        : base_type(other) {}
-
-private:
-    concurrent_queue_iterator( const Container& container )
-        : base_type(container.my_queue_representation) {}
-public:
-    concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) {
-        this->assign(other);
-        return *this;
-    }
-
-    reference operator*() const {
-        return *static_cast<pointer>(this->my_item);
-    }
-
-    pointer operator->() const { return &operator*(); }
-
-    concurrent_queue_iterator& operator++() {
-        this->advance();
-        return *this;
-    }
-
-    concurrent_queue_iterator operator++(int) {
-        concurrent_queue_iterator tmp = *this;
-        ++*this;
-        return tmp;
-    }
-
-    friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) {
-        return lhs.my_item == rhs.my_item;
-    }
-
-    friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) {
-        return lhs.my_item != rhs.my_item;
-    }
-private:
-    friend struct concurrent_queue_iterator_provider;
-}; // class concurrent_queue_iterator
-
-} // namespace d1
-} // namespace detail
-} // tbb
-
-#endif // __TBB_detail__concurrent_queue_base_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__concurrent_queue_base_H 
+#define __TBB_detail__concurrent_queue_base_H 
+ 
+#include "_utils.h" 
+#include "_exception.h" 
+#include "_machine.h" 
+#include "_allocator_traits.h" 
+ 
+#include "../profiling.h" 
+#include "../spin_mutex.h" 
+#include "../cache_aligned_allocator.h" 
+ 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+using ticket_type = std::size_t; 
+ 
+template <typename Page> 
+inline bool is_valid_page(const Page p) { 
+    return reinterpret_cast<std::uintptr_t>(p) > 1; 
+} 
+ 
+template <typename T, typename Allocator> 
+struct concurrent_queue_rep; 
+ 
+template <typename Container, typename T, typename Allocator> 
+class micro_queue_pop_finalizer; 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+// unary minus operator applied to unsigned type, result still unsigned 
+#pragma warning( push ) 
+#pragma warning( disable: 4146 ) 
+#endif 
+ 
+// A queue using simple locking. 
+// For efficiency, this class has no constructor. 
+// The caller is expected to zero-initialize it. 
+template <typename T, typename Allocator> 
+class micro_queue { 
+private: 
+    using queue_rep_type = concurrent_queue_rep<T, Allocator>; 
+    using self_type = micro_queue<T, Allocator>; 
+public: 
+    using size_type = std::size_t; 
+    using value_type = T; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+ 
+    using allocator_type = Allocator; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+ 
+    static constexpr size_type item_size = sizeof(T); 
+    static constexpr size_type items_per_page = item_size <=   8 ? 32 : 
+                                                item_size <=  16 ? 16 : 
+                                                item_size <=  32 ?  8 : 
+                                                item_size <=  64 ?  4 : 
+                                                item_size <= 128 ?  2 : 1; 
+ 
+    struct padded_page { 
+        padded_page() {} 
+        ~padded_page() {} 
+ 
+        reference operator[] (std::size_t index) { 
+            __TBB_ASSERT(index < items_per_page, "Index out of range"); 
+            return items[index]; 
+        } 
+ 
+        const_reference operator[] (std::size_t index) const { 
+            __TBB_ASSERT(index < items_per_page, "Index out of range"); 
+            return items[index]; 
+        } 
+ 
+        padded_page* next{ nullptr }; 
+        std::atomic<std::uintptr_t> mask{}; 
+ 
+        union { 
+            value_type items[items_per_page]; 
+        }; 
+    }; // struct padded_page 
+ 
+    using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>; 
+protected: 
+    using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; 
+ 
+public: 
+    using item_constructor_type = void (*)(value_type* location, const void* src); 
+    micro_queue() = default; 
+    micro_queue( const micro_queue& ) = delete; 
+    micro_queue& operator=( const micro_queue& ) = delete; 
+ 
+    size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator, 
+                            padded_page*& p ) { 
+        __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page"); 
+        k &= -queue_rep_type::n_queue; 
+        size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page); 
+        if (!index) { 
+            try_call( [&] { 
+                p = page_allocator_traits::allocate(page_allocator, 1); 
+            }).on_exception( [&] { 
+                ++base.n_invalid_entries; 
+                invalidate_page( k ); 
+            }); 
+            page_allocator_traits::construct(page_allocator, p); 
+        } 
+ 
+        if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); 
+        call_itt_notify(acquired, &tail_counter); 
+ 
+        if (p) { 
+            spin_mutex::scoped_lock lock( page_mutex ); 
+            padded_page* q = tail_page.load(std::memory_order_relaxed); 
+            if (is_valid_page(q)) { 
+                q->next = p; 
+            } else { 
+                head_page.store(p, std::memory_order_relaxed); 
+            } 
+            tail_page.store(p, std::memory_order_relaxed);; 
+        } else { 
+            p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? 
+        } 
+        return index; 
+    } 
+ 
+    template<typename... Args> 
+    void push( ticket_type k, queue_rep_type& base, Args&&... args ) 
+    { 
+        padded_page* p = nullptr; 
+        page_allocator_type page_allocator(base.get_allocator()); 
+        size_type index = prepare_page(k, base, page_allocator, p); 
+        __TBB_ASSERT(p != nullptr, "Page was not prepared"); 
+ 
+        // try_call API is not convenient here due to broken 
+        // variadic capture on GCC 4.8.5 
+        auto value_guard = make_raii_guard([&] { 
+            ++base.n_invalid_entries; 
+            call_itt_notify(releasing, &tail_counter); 
+            tail_counter.fetch_add(queue_rep_type::n_queue); 
+        }); 
+ 
+        page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...); 
+        // If no exception was thrown, mark item as present. 
+        p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); 
+        call_itt_notify(releasing, &tail_counter); 
+ 
+        value_guard.dismiss(); 
+        tail_counter.fetch_add(queue_rep_type::n_queue); 
+    } 
+ 
+    void abort_push( ticket_type k, queue_rep_type& base) { 
+        padded_page* p = nullptr; 
+        prepare_page(k, base, base.get_allocator(), p); 
+        ++base.n_invalid_entries; 
+        tail_counter.fetch_add(queue_rep_type::n_queue); 
+    } 
+ 
+    bool pop( void* dst, ticket_type k, queue_rep_type& base ) { 
+        k &= -queue_rep_type::n_queue; 
+        if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k); 
+        call_itt_notify(acquired, &head_counter); 
+        if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k); 
+        call_itt_notify(acquired, &tail_counter); 
+        padded_page *p = head_page.load(std::memory_order_acquire); 
+        __TBB_ASSERT( p, nullptr ); 
+        size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); 
+        bool success = false; 
+        { 
+            page_allocator_type page_allocator(base.get_allocator()); 
+            micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator, 
+                k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); 
+            if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { 
+                success = true; 
+                assign_and_destroy_item( dst, *p, index ); 
+            } else { 
+                --base.n_invalid_entries; 
+            } 
+        } 
+        return success; 
+    } 
+ 
+    micro_queue& assign( const micro_queue& src, queue_rep_type& base, 
+        item_constructor_type construct_item ) 
+    { 
+        head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+ 
+        const padded_page* srcp = src.head_page.load(std::memory_order_relaxed); 
+        if( is_valid_page(srcp) ) { 
+            ticket_type g_index = head_counter.load(std::memory_order_relaxed); 
+            size_type n_items  = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed)) 
+                / queue_rep_type::n_queue; 
+            size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); 
+            size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; 
+ 
+            try_call( [&] { 
+                head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); 
+            }).on_exception( [&] { 
+                head_counter.store(0, std::memory_order_relaxed); 
+                tail_counter.store(0, std::memory_order_relaxed); 
+            }); 
+            padded_page* cur_page = head_page.load(std::memory_order_relaxed); 
+ 
+            try_call( [&] { 
+                if (srcp != src.tail_page.load(std::memory_order_relaxed)) { 
+                    for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { 
+                        cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item ); 
+                        cur_page = cur_page->next; 
+                    } 
+ 
+                    __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr ); 
+                    size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); 
+                    if( last_index==0 ) last_index = items_per_page; 
+ 
+                    cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item ); 
+                    cur_page = cur_page->next; 
+                } 
+                tail_page.store(cur_page, std::memory_order_relaxed); 
+            }).on_exception( [&] { 
+                padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); 
+                tail_page.store(invalid_page, std::memory_order_relaxed); 
+            }); 
+        } else { 
+            head_page.store(nullptr, std::memory_order_relaxed); 
+            tail_page.store(nullptr, std::memory_order_relaxed); 
+        } 
+        return *this; 
+    } 
+ 
+    padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page, 
+        size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) 
+    { 
+        page_allocator_type page_allocator(base.get_allocator()); 
+        padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); 
+        new_page->next = nullptr; 
+        new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) { 
+            if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) { 
+                copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item); 
+            } 
+        } 
+        return new_page; 
+    } 
+ 
+    void invalidate_page( ticket_type k )  { 
+        // Append an invalid page at address 1 so that no more pushes are allowed. 
+        padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); 
+        { 
+            spin_mutex::scoped_lock lock( page_mutex ); 
+            tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed); 
+            padded_page* q = tail_page.load(std::memory_order_relaxed); 
+            if (is_valid_page(q)) { 
+                q->next = invalid_page; 
+            } else { 
+                head_page.store(invalid_page, std::memory_order_relaxed); 
+            } 
+            tail_page.store(invalid_page, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+    padded_page* get_tail_page() { 
+        return tail_page.load(std::memory_order_relaxed); 
+    } 
+ 
+    padded_page* get_head_page() { 
+        return head_page.load(std::memory_order_relaxed); 
+    } 
+ 
+    void set_tail_page( padded_page* pg ) { 
+        tail_page.store(pg, std::memory_order_relaxed); 
+    } 
+ 
+    void clear(queue_rep_type& base) { 
+        padded_page* curr_page = head_page.load(std::memory_order_relaxed); 
+        std::size_t index = head_counter.load(std::memory_order_relaxed); 
+        page_allocator_type page_allocator(base.get_allocator()); 
+ 
+        while (curr_page) { 
+            for (; index != items_per_page - 1; ++index) { 
+                curr_page->operator[](index).~value_type(); 
+            } 
+                padded_page* next_page = curr_page->next; 
+                page_allocator_traits::destroy(page_allocator, curr_page); 
+                page_allocator_traits::deallocate(page_allocator, curr_page, 1); 
+                curr_page = next_page; 
+        } 
+ 
+        padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); 
+        head_page.store(invalid_page, std::memory_order_relaxed); 
+        tail_page.store(invalid_page, std::memory_order_relaxed); 
+    } 
+ 
+private: 
+    // template <typename U, typename A> 
+    friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>; 
+ 
+    // Class used to ensure exception-safety of method "pop" 
+    class destroyer  { 
+        value_type& my_value; 
+    public: 
+        destroyer( reference value ) : my_value(value) {} 
+        destroyer( const destroyer& ) = delete; 
+        destroyer& operator=( const destroyer& ) = delete; 
+        ~destroyer() {my_value.~T();} 
+    }; // class destroyer 
+ 
+    void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex, 
+        item_constructor_type construct_item ) 
+    { 
+        auto& src_item = src[sindex]; 
+        construct_item( &dst[dindex], static_cast<const void*>(&src_item) ); 
+    } 
+ 
+    void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) { 
+        auto& from = src[index]; 
+        destroyer d(from); 
+        *static_cast<T*>(dst) = std::move(from); 
+    } 
+ 
+    void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const { 
+        for (atomic_backoff b(true);; b.pause()) { 
+            ticket_type c = counter; 
+            if (c == k) return; 
+            else if (c & 1) { 
+                ++rb.n_invalid_entries; 
+                throw_exception( exception_id::bad_last_alloc); 
+            } 
+        } 
+    } 
+ 
+    std::atomic<padded_page*> head_page{}; 
+    std::atomic<ticket_type> head_counter{}; 
+ 
+    std::atomic<padded_page*> tail_page{}; 
+    std::atomic<ticket_type> tail_counter{}; 
+ 
+    spin_mutex page_mutex{}; 
+}; // class micro_queue 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+#pragma warning( pop ) 
+#endif // warning 4146 is back 
+ 
+template <typename Container, typename T, typename Allocator> 
+class micro_queue_pop_finalizer { 
+public: 
+    using padded_page = typename Container::padded_page; 
+    using allocator_type = Allocator; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+ 
+    micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) : 
+        my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc) 
+    {} 
+ 
+    micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete; 
+    micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete; 
+ 
+    ~micro_queue_pop_finalizer() { 
+        padded_page* p = my_page; 
+        if( is_valid_page(p) ) { 
+            spin_mutex::scoped_lock lock( my_queue.page_mutex ); 
+            padded_page* q = p->next; 
+            my_queue.head_page.store(q, std::memory_order_relaxed); 
+            if( !is_valid_page(q) ) { 
+                my_queue.tail_page.store(nullptr, std::memory_order_relaxed); 
+            } 
+        } 
+        my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed); 
+        if ( is_valid_page(p) ) { 
+            allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p)); 
+            allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1); 
+        } 
+    } 
+private: 
+    ticket_type my_ticket_type; 
+    Container& my_queue; 
+    padded_page* my_page; 
+    Allocator& allocator; 
+}; // class micro_queue_pop_finalizer 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+// structure was padded due to alignment specifier 
+#pragma warning( push ) 
+#pragma warning( disable: 4324 ) 
+#endif 
+ 
+template <typename T, typename Allocator> 
+struct concurrent_queue_rep { 
+    using self_type = concurrent_queue_rep<T, Allocator>; 
+    using size_type = std::size_t; 
+    using micro_queue_type = micro_queue<T, Allocator>; 
+    using allocator_type = Allocator; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+    using padded_page = typename micro_queue_type::padded_page; 
+    using page_allocator_type = typename micro_queue_type::page_allocator_type; 
+    using item_constructor_type = typename micro_queue_type::item_constructor_type; 
+private: 
+    using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; 
+    using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>; 
+ 
+public: 
+    // must be power of 2 
+    static constexpr size_type n_queue = 8; 
+    // Approximately n_queue/golden ratio 
+    static constexpr size_type phi = 3; 
+    static constexpr size_type item_size = micro_queue_type::item_size; 
+    static constexpr size_type items_per_page = micro_queue_type::items_per_page; 
+ 
+    concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc) 
+    {} 
+ 
+    concurrent_queue_rep( const concurrent_queue_rep& ) = delete; 
+    concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; 
+ 
+    void clear() { 
+        page_allocator_type page_allocator(my_queue_allocator); 
+        for (size_type i = 0; i < n_queue; ++i) { 
+            padded_page* tail_page = array[i].get_tail_page(); 
+            if( is_valid_page(tail_page) ) { 
+                __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" ); 
+                page_allocator_traits::destroy(page_allocator, static_cast<padded_page*>(tail_page)); 
+                page_allocator_traits::deallocate(page_allocator, static_cast<padded_page*>(tail_page), 1); 
+                array[i].set_tail_page(nullptr); 
+            } else { 
+                __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?"); 
+            } 
+        } 
+    } 
+ 
+    void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) { 
+        head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+ 
+        // copy or move micro_queues 
+        size_type queue_idx = 0; 
+        try_call( [&] { 
+            for (; queue_idx < n_queue; ++queue_idx) { 
+                array[queue_idx].assign(src.array[queue_idx], *this, construct_item); 
+            } 
+        }).on_exception( [&] { 
+            for (size_type i = 0; i < queue_idx + 1; ++i) { 
+                array[i].clear(*this); 
+            } 
+            head_counter.store(0, std::memory_order_relaxed); 
+            tail_counter.store(0, std::memory_order_relaxed); 
+            n_invalid_entries.store(0, std::memory_order_relaxed); 
+        }); 
+ 
+        __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) && 
+                     tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed), 
+                     "the source concurrent queue should not be concurrently modified." ); 
+    } 
+ 
+    bool empty() const { 
+        ticket_type tc = tail_counter.load(std::memory_order_acquire); 
+        ticket_type hc = head_counter.load(std::memory_order_relaxed); 
+        // if tc!=r.tail_counter, the queue was not empty at some point between the two reads. 
+        return tc == tail_counter.load(std::memory_order_relaxed) && 
+               std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0; 
+    } 
+ 
+    std::ptrdiff_t size() const { 
+        __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); 
+        std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); 
+        std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); 
+        std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); 
+ 
+        return tc - hc - nie; 
+    } 
+ 
+    queue_allocator_type& get_allocator() { 
+        return my_queue_allocator; 
+    } 
+ 
+    friend class micro_queue<T, Allocator>; 
+ 
+    // Map ticket_type to an array index 
+    static size_type index( ticket_type k ) { 
+        return k * phi % n_queue; 
+    } 
+ 
+    micro_queue_type& choose( ticket_type k ) { 
+        // The formula here approximates LRU in a cache-oblivious way. 
+        return array[index(k)]; 
+    } 
+ 
+    alignas(max_nfs_size) micro_queue_type array[n_queue]; 
+ 
+    alignas(max_nfs_size) std::atomic<ticket_type> head_counter{}; 
+    alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{}; 
+    alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{}; 
+    queue_allocator_type& my_queue_allocator; 
+}; // class concurrent_queue_rep 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+#pragma warning( pop ) 
+#endif 
+ 
+template <typename Value, typename Allocator> 
+class concurrent_queue_iterator_base { 
+    using queue_rep_type = concurrent_queue_rep<Value, Allocator>; 
+    using padded_page = typename queue_rep_type::padded_page; 
+protected: 
+    concurrent_queue_iterator_base() = default; 
+ 
+    concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) { 
+        assign(other); 
+    } 
+ 
+    concurrent_queue_iterator_base( queue_rep_type* queue_rep ) 
+        : my_queue_rep(queue_rep), 
+          my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed)) 
+    { 
+        for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { 
+            my_array[i] = my_queue_rep->array[i].get_head_page(); 
+        } 
+ 
+        if (!get_item(my_item, my_head_counter)) advance(); 
+    } 
+ 
+    void assign( const concurrent_queue_iterator_base& other ) { 
+        my_item = other.my_item; 
+        my_queue_rep = other.my_queue_rep; 
+ 
+        if (my_queue_rep != nullptr) { 
+            my_head_counter = other.my_head_counter; 
+ 
+            for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { 
+                my_array[i] = other.my_array[i]; 
+            } 
+        } 
+    } 
+ 
+    void advance() { 
+        __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue"); 
+        std::size_t k = my_head_counter; 
+#if TBB_USE_ASSERT 
+        Value* tmp; 
+        get_item(tmp, k); 
+        __TBB_ASSERT(my_item == tmp, nullptr); 
+#endif 
+        std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); 
+        if (i == my_queue_rep->items_per_page - 1) { 
+            padded_page*& root = my_array[queue_rep_type::index(k)]; 
+            root = root->next; 
+        } 
+        // Advance k 
+        my_head_counter = ++k; 
+        if (!get_item(my_item, k)) advance(); 
+    } 
+ 
+    concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) { 
+        this->assign(other); 
+        return *this; 
+    } 
+ 
+    bool get_item( Value*& item, std::size_t k ) { 
+        if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) { 
+            item = nullptr; 
+            return true; 
+        } else { 
+            padded_page* p = my_array[queue_rep_type::index(k)]; 
+            __TBB_ASSERT(p, nullptr); 
+            std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); 
+            item = &(*p)[i]; 
+            return (p->mask & uintptr_t(1) << i) != 0; 
+        } 
+    } 
+ 
+    Value* my_item{ nullptr }; 
+    queue_rep_type* my_queue_rep{ nullptr }; 
+    ticket_type my_head_counter{}; 
+    padded_page* my_array[queue_rep_type::n_queue]; 
+}; // class concurrent_queue_iterator_base 
+ 
+struct concurrent_queue_iterator_provider { 
+    template <typename Iterator, typename Container> 
+    static Iterator get( const Container& container ) { 
+        return Iterator(container); 
+    } 
+}; // struct concurrent_queue_iterator_provider 
+ 
+template <typename Container, typename Value, typename Allocator> 
+class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> { 
+    using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>; 
+public: 
+    using value_type = Value; 
+    using pointer = value_type*; 
+    using reference = value_type&; 
+    using difference_type = std::ptrdiff_t; 
+    using iterator_category = std::forward_iterator_tag; 
+ 
+    concurrent_queue_iterator() = default; 
+ 
+    /** If Value==Container::value_type, then this routine is the copy constructor. 
+        If Value==const Container::value_type, then this routine is a conversion constructor. */ 
+    concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) 
+        : base_type(other) {} 
+ 
+private: 
+    concurrent_queue_iterator( const Container& container ) 
+        : base_type(container.my_queue_representation) {} 
+public: 
+    concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) { 
+        this->assign(other); 
+        return *this; 
+    } 
+ 
+    reference operator*() const { 
+        return *static_cast<pointer>(this->my_item); 
+    } 
+ 
+    pointer operator->() const { return &operator*(); } 
+ 
+    concurrent_queue_iterator& operator++() { 
+        this->advance(); 
+        return *this; 
+    } 
+ 
+    concurrent_queue_iterator operator++(int) { 
+        concurrent_queue_iterator tmp = *this; 
+        ++*this; 
+        return tmp; 
+    } 
+ 
+    friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { 
+        return lhs.my_item == rhs.my_item; 
+    } 
+ 
+    friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { 
+        return lhs.my_item != rhs.my_item; 
+    } 
+private: 
+    friend struct concurrent_queue_iterator_provider; 
+}; // class concurrent_queue_iterator 
+ 
+} // namespace d1 
+} // namespace detail 
+} // tbb 
+ 
+#endif // __TBB_detail__concurrent_queue_base_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h
index c4d4c627e0..734e9ac3d5 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h
@@ -1,1252 +1,1252 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__concurrent_skip_list_H
-#define __TBB_detail__concurrent_skip_list_H
-
-#if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_config.h"
-#include "_range_common.h"
-#include "_allocator_traits.h"
-#include "_template_helpers.h"
-#include "_node_handle.h"
-#include "_containers_helpers.h"
-#include "_assert.h"
-#include "_exception.h"
-#include "../enumerable_thread_specific.h"
-#include <utility>
-#include <initializer_list>
-#include <atomic>
-#include <array>
-#include <type_traits>
-#include <random> // Need std::geometric_distribution
-#include <algorithm> // Need std::equal and std::lexicographical_compare
-#include <cstdint>
-#if __TBB_CPP20_COMPARISONS_PRESENT
-#include <compare>
-#endif
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // warning C4127: conditional expression is constant
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Value, typename Allocator>
-class skip_list_node {
-    using node_ptr = skip_list_node*;
-public:
-    using value_type = Value;
-    using atomic_node_ptr = std::atomic<node_ptr>;
-    using size_type = std::size_t;
-    using container_allocator_type = Allocator;
-
-    using reference = value_type&;
-    using const_reference = const value_type&;
-private:
-    using allocator_traits = tbb::detail::allocator_traits<container_allocator_type>;
-
-    // Allocator is the same as the container allocator=> allocates unitptr_t
-    // It is required to rebind it to value_type to get the correct pointer and const_pointer
-    using value_allocator_traits = typename allocator_traits::template rebind_traits<value_type>;
-public:
-    using pointer = typename value_allocator_traits::pointer;
-    using const_pointer = typename value_allocator_traits::const_pointer;
-
-    skip_list_node( size_type levels, container_allocator_type& alloc )
-        : my_container_allocator(alloc), my_height(levels), my_index_number(0)
-    {
-        for (size_type l = 0; l < my_height; ++l) {
-            allocator_traits::construct(my_container_allocator, &get_atomic_next(l), nullptr);
-        }
-    }
-
-    ~skip_list_node() {
-        for (size_type l = 0; l < my_height; ++l) {
-            allocator_traits::destroy(my_container_allocator, &get_atomic_next(l));
-        }
-    }
-
-    skip_list_node( const skip_list_node& ) = delete;
-    skip_list_node( skip_list_node&& ) = delete;
-    skip_list_node& operator=( const skip_list_node& ) = delete;
-    skip_list_node& operator=( skip_list_node&& ) = delete;
-
-    pointer storage() {
-        return &my_value;
-    }
-
-    reference value() {
-        return *storage();
-    }
-
-    node_ptr next( size_type level ) const {
-        node_ptr res = get_atomic_next(level).load(std::memory_order_acquire);
-        __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure");
-        return res;
-    }
-
-    atomic_node_ptr& atomic_next( size_type level ) {
-        atomic_node_ptr& res = get_atomic_next(level);
-#if TBB_USE_DEBUG
-        node_ptr node = res.load(std::memory_order_acquire);
-        __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure");
-#endif
-        return res;
-    }
-
-    void set_next( size_type level, node_ptr n ) {
-        __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure");
-        get_atomic_next(level).store(n, std::memory_order_relaxed);
-    }
-
-    size_type height() const {
-        return my_height;
-    }
-
-    void set_index_number( size_type index_num ) {
-        my_index_number = index_num;
-    }
-
-    size_type index_number() const {
-        return my_index_number;
-    }
-
-private:
-    atomic_node_ptr& get_atomic_next( size_type level ) {
-        atomic_node_ptr* arr = reinterpret_cast<atomic_node_ptr*>(this + 1);
-        return arr[level];
-    }
-
-    const atomic_node_ptr& get_atomic_next( size_type level ) const {
-        const atomic_node_ptr* arr = reinterpret_cast<const atomic_node_ptr*>(this + 1);
-        return arr[level];
-    }
-
-    container_allocator_type& my_container_allocator;
-    union {
-        value_type my_value;
-    };
-    size_type my_height;
-    size_type my_index_number;
-}; // class skip_list_node
-
-template <typename NodeType, typename ValueType>
-class skip_list_iterator {
-    using node_type = NodeType;
-    using node_ptr = node_type*;
-public:
-    using iterator_category = std::forward_iterator_tag;
-    using value_type = ValueType;
-
-    using difference_type = std::ptrdiff_t;
-    using pointer = value_type*;
-    using reference = value_type&;
-
-    skip_list_iterator() : skip_list_iterator(nullptr) {}
-
-    skip_list_iterator( const skip_list_iterator<node_type, typename node_type::value_type>& other )
-        : my_node_ptr(other.my_node_ptr) {}
-
-    skip_list_iterator& operator=( const skip_list_iterator<node_type, typename node_type::value_type>& other ) {
-        my_node_ptr = other.my_node_ptr;
-        return *this;
-    }
-
-    reference operator*() const { return my_node_ptr->value(); }
-    pointer operator->() const { return my_node_ptr->storage(); }
-
-    skip_list_iterator& operator++() {
-        __TBB_ASSERT(my_node_ptr != nullptr, nullptr);
-        my_node_ptr = my_node_ptr->next(0);
-        return *this;
-    }
-
-    skip_list_iterator operator++(int) {
-        skip_list_iterator tmp = *this;
-        ++*this;
-        return tmp;
-    }
-
-private:
-    skip_list_iterator(node_type* n) : my_node_ptr(n) {}
-
-    node_ptr my_node_ptr;
-
-    template <typename Traits>
-    friend class concurrent_skip_list;
-
-    template <typename N, typename V>
-    friend class skip_list_iterator;
-
-    friend class const_range;
-    friend class range;
-
-    friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) {
-        return lhs.my_node_ptr == rhs.my_node_ptr;
-    }
-
-    friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) {
-        return lhs.my_node_ptr != rhs.my_node_ptr;
-    }
-}; // class skip_list_iterator
-
-template <typename Traits>
-class concurrent_skip_list {
-protected:
-    using container_traits = Traits;
-    using self_type = concurrent_skip_list<container_traits>;
-    using allocator_type = typename container_traits::allocator_type;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-    using key_compare = typename container_traits::compare_type;
-    using value_compare = typename container_traits::value_compare;
-    using key_type = typename container_traits::key_type;
-    using value_type = typename container_traits::value_type;
-    static_assert(std::is_same<value_type, typename allocator_type::value_type>::value,
-                  "value_type of the container should be the same as its allocator");
-
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-
-    static constexpr size_type max_level = container_traits::max_level;
-
-    using node_allocator_type = typename allocator_traits_type::template rebind_alloc<std::uint8_t>;
-    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>;
-
-    using list_node_type = skip_list_node<value_type, node_allocator_type>;
-    using node_type = node_handle<key_type, value_type, list_node_type, allocator_type>;
-
-    using iterator = skip_list_iterator<list_node_type, value_type>;
-    using const_iterator = skip_list_iterator<list_node_type, const value_type>;
-
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using random_level_generator_type = typename container_traits::random_level_generator_type;
-
-    using node_ptr = list_node_type*;
-
-    using array_type = std::array<node_ptr, max_level>;
-private:
-    template <typename T>
-    using is_transparent = dependent_bool<comp_is_transparent<key_compare>, T>;
-public:
-    static constexpr bool allow_multimapping = container_traits::allow_multimapping;
-
-    concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {}
-
-    explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() )
-        : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {}
-
-    explicit concurrent_skip_list( const allocator_type& alloc )
-        : concurrent_skip_list(key_compare(), alloc) {}
-
-    template<typename InputIterator>
-    concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(),
-                          const allocator_type& alloc = allocator_type() )
-        : concurrent_skip_list(comp, alloc)
-    {
-        internal_copy(first, last);
-    }
-
-    template <typename InputIterator>
-    concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc )
-        : concurrent_skip_list(first, last, key_compare(), alloc) {}
-
-    concurrent_skip_list( std::initializer_list<value_type> init, const key_compare& comp = key_compare(),
-                          const allocator_type& alloc = allocator_type() )
-        : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {}
-
-    concurrent_skip_list( std::initializer_list<value_type> init, const allocator_type& alloc )
-        : concurrent_skip_list(init, key_compare(), alloc) {}
-
-    concurrent_skip_list( const concurrent_skip_list& other )
-        : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())),
-          my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr),
-          my_size(0), my_max_height(0)
-    {
-        internal_copy(other);
-        __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container");
-    }
-
-    concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc )
-        : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr),
-          my_size(0), my_max_height(0)
-    {
-        internal_copy(other);
-        __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container");
-    }
-
-    concurrent_skip_list( concurrent_skip_list&& other )
-        : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare),
-          my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move
-    {
-        internal_move(std::move(other));
-    }
-
-    concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc )
-        : my_node_allocator(alloc), my_compare(other.my_compare),
-          my_rng(std::move(other.my_rng)), my_head_ptr(nullptr)
-    {
-        using is_always_equal = typename allocator_traits_type::is_always_equal;
-        internal_move_construct_with_allocator(std::move(other), is_always_equal());
-    }
-
-    ~concurrent_skip_list() {
-        clear();
-        node_ptr head = my_head_ptr.load(std::memory_order_relaxed);
-        if (head != nullptr) {
-            delete_node(head);
-        }
-    }
-
-    concurrent_skip_list& operator=( const concurrent_skip_list& other ) {
-        if (this != &other) {
-            clear();
-            copy_assign_allocators(my_node_allocator, other.my_node_allocator);
-            my_compare = other.my_compare;
-            my_rng = other.my_rng;
-            internal_copy(other);
-        }
-        return *this;
-    }
-
-    concurrent_skip_list& operator=( concurrent_skip_list&& other ) {
-        if (this != &other) {
-            clear();
-            my_compare = std::move(other.my_compare);
-            my_rng = std::move(other.my_rng);
-
-            move_assign_allocators(my_node_allocator, other.my_node_allocator);
-            using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment;
-            using is_always_equal = typename node_allocator_traits::is_always_equal;
-            internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>());
-        }
-        return *this;
-    }
-
-    concurrent_skip_list& operator=( std::initializer_list<value_type> il )
-    {
-        clear();
-        insert(il.begin(),il.end());
-        return *this;
-    }
-
-    std::pair<iterator, bool> insert( const value_type& value ) {
-        return internal_insert(value);
-    }
-
-    std::pair<iterator, bool> insert( value_type&& value ) {
-        return internal_insert(std::move(value));
-    }
-
-    iterator insert( const_iterator, const_reference value ) {
-        // Ignore hint
-        return insert(value).first;
-    }
-
-    iterator insert( const_iterator, value_type&& value ) {
-        // Ignore hint
-        return insert(std::move(value)).first;
-    }
-
-    template<typename InputIterator>
-    void insert( InputIterator first, InputIterator last ) {
-        while (first != last) {
-            insert(*first);
-            ++first;
-        }
-    }
-
-    void insert( std::initializer_list<value_type> init ) {
-        insert(init.begin(), init.end());
-    }
-
-    std::pair<iterator, bool> insert( node_type&& nh ) {
-        if (!nh.empty()) {
-            auto insert_node = node_handle_accessor::get_node_ptr(nh);
-            std::pair<iterator, bool> insert_result = internal_insert_node(insert_node);
-            if (insert_result.second) {
-                node_handle_accessor::deactivate(nh);
-            }
-            return insert_result;
-        }
-        return std::pair<iterator, bool>(end(), false);
-    }
-
-    iterator insert( const_iterator, node_type&& nh ) {
-        // Ignore hint
-        return insert(std::move(nh)).first;
-    }
-
-    template<typename... Args>
-    std::pair<iterator, bool> emplace( Args&&... args ) {
-        return internal_insert(std::forward<Args>(args)...);
-    }
-
-    template<typename... Args>
-    iterator emplace_hint( const_iterator, Args&&... args ) {
-        // Ignore hint
-        return emplace(std::forward<Args>(args)...).first;
-    }
-
-    iterator unsafe_erase( iterator pos ) {
-        std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos);
-        if (extract_result.first) { // node was extracted
-            delete_value_node(extract_result.first);
-            return extract_result.second;
-        }
-        return end();
-    }
-
-    iterator unsafe_erase( const_iterator pos ) {
-        return unsafe_erase(get_iterator(pos));
-    }
-
-    iterator unsafe_erase( const_iterator first, const_iterator last ) {
-        while (first != last) {
-            // Unsafe erase returns the iterator which follows the erased one
-            first = unsafe_erase(first);
-        }
-        return get_iterator(first);
-    }
-
-    size_type unsafe_erase( const key_type& key ) {
-        return internal_erase(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value
-                            && !std::is_convertible<K, const_iterator>::value
-                            && !std::is_convertible<K, iterator>::value,
-                            size_type>::type unsafe_erase( const K& key )
-    {
-        return internal_erase(key);
-    }
-
-    node_type unsafe_extract( const_iterator pos ) {
-        std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos);
-        return extract_result.first ? node_handle_accessor::construct<node_type>(extract_result.first) : node_type();
-    }
-
-    node_type unsafe_extract( iterator pos ) {
-        return unsafe_extract(const_iterator(pos));
-    }
-
-    node_type unsafe_extract( const key_type& key ) {
-        return unsafe_extract(find(key));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value
-                            && !std::is_convertible<K, const_iterator>::value
-                            && !std::is_convertible<K, iterator>::value,
-                            node_type>::type unsafe_extract( const K& key )
-    {
-        return unsafe_extract(find(key));
-    }
-
-    iterator lower_bound( const key_type& key ) {
-        return iterator(internal_get_bound(key, my_compare));
-    }
-
-    const_iterator lower_bound( const key_type& key ) const {
-        return const_iterator(internal_get_bound(key, my_compare));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, iterator>::type lower_bound( const K& key ) {
-        return iterator(internal_get_bound(key, my_compare));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, const_iterator>::type lower_bound( const K& key ) const {
-        return const_iterator(internal_get_bound(key, my_compare));
-    }
-
-    iterator upper_bound( const key_type& key ) {
-        return iterator(internal_get_bound(key, not_greater_compare(my_compare)));
-    }
-
-    const_iterator upper_bound( const key_type& key ) const {
-        return const_iterator(internal_get_bound(key, not_greater_compare(my_compare)));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, iterator>::type upper_bound( const K& key ) {
-        return iterator(internal_get_bound(key, not_greater_compare(my_compare)));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, const_iterator>::type upper_bound( const K& key ) const {
-        return const_iterator(internal_get_bound(key, not_greater_compare(my_compare)));
-    }
-
-    iterator find( const key_type& key ) {
-        return iterator(internal_find(key));
-    }
-
-    const_iterator find( const key_type& key ) const {
-        return const_iterator(internal_find(key));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) {
-        return iterator(internal_find(key));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const {
-        return const_iterator(internal_find(key));
-    }
-
-    size_type count( const key_type& key ) const {
-        return internal_count(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const {
-        return internal_count(key);
-    }
-
-    bool contains( const key_type& key ) const {
-        return find(key) != end();
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const {
-        return find(key) != end();
-    }
-
-    void clear() noexcept {
-        // clear is not thread safe - load can be relaxed
-        node_ptr head = my_head_ptr.load(std::memory_order_relaxed);
-
-        if (head == nullptr) return; // Head is not allocated => container is empty
-
-        node_ptr current = head->next(0);
-
-        // Delete all value nodes in the container
-        while (current) {
-            node_ptr next = current->next(0);
-            delete_value_node(current);
-            current = next;
-        }
-
-        for (size_type level = 0; level < head->height(); ++level) {
-            head->set_next(level, nullptr);
-        }
-
-        my_size.store(0, std::memory_order_relaxed);
-        my_max_height.store(0, std::memory_order_relaxed);
-    }
-
-    iterator begin() {
-        return iterator(internal_begin());
-    }
-
-    const_iterator begin() const {
-        return const_iterator(internal_begin());
-    }
-
-    const_iterator cbegin() const {
-        return const_iterator(internal_begin());
-    }
-
-    iterator end() {
-        return iterator(nullptr);
-    }
-
-    const_iterator end() const {
-        return const_iterator(nullptr);
-    }
-
-    const_iterator cend() const {
-        return const_iterator(nullptr);
-    }
-
-    size_type size() const {
-        return my_size.load(std::memory_order_relaxed);
-    }
-
-    size_type max_size() const {
-        return node_allocator_traits::max_size(my_node_allocator);
-    }
-
-    __TBB_nodiscard bool empty() const {
-        return 0 == size();
-    }
-
-    allocator_type get_allocator() const {
-        return my_node_allocator;
-    }
-
-    void swap(concurrent_skip_list& other) {
-        if (this != &other) {
-            using pocs_type = typename node_allocator_traits::propagate_on_container_swap;
-            using is_always_equal = typename node_allocator_traits::is_always_equal;
-            internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>());
-        }
-    }
-
-    std::pair<iterator, iterator> equal_range(const key_type& key) {
-        return internal_equal_range(key);
-    }
-
-    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
-        return internal_equal_range(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) {
-        return internal_equal_range(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const {
-        return internal_equal_range(key);
-    }
-
-    key_compare key_comp() const { return my_compare; }
-
-    value_compare value_comp() const { return container_traits::value_comp(my_compare); }
-
-    class const_range_type {
-    public:
-        using size_type = typename concurrent_skip_list::size_type;
-        using value_type = typename concurrent_skip_list::value_type;
-        using iterator = typename concurrent_skip_list::const_iterator;
-
-        bool empty() const {
-            return my_begin.my_node_ptr->next(0) == my_end.my_node_ptr;
-        }
-
-        bool is_divisible() const {
-            return my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false;
-        }
-
-        size_type size() const { return std::distance(my_begin, my_end); }
-
-        const_range_type( const_range_type& r, split)
-            : my_end(r.my_end) {
-            my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1));
-            my_level = my_begin.my_node_ptr->height();
-            r.my_end = my_begin;
-        }
-
-        const_range_type( const concurrent_skip_list& l)
-            : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr->height() ) {}
-
-        iterator begin() const { return my_begin; }
-        iterator end() const { return my_end; }
-        size_type grainsize() const { return 1; }
-
-    private:
-        const_iterator my_end;
-        const_iterator my_begin;
-        size_type my_level;
-    }; // class const_range_type
-
-    class range_type : public const_range_type {
-    public:
-        using iterator = typename concurrent_skip_list::iterator;
-
-        range_type(range_type& r, split) : const_range_type(r, split()) {}
-        range_type(const concurrent_skip_list& l) : const_range_type(l) {}
-
-        iterator begin() const {
-            node_ptr node = const_range_type::begin().my_node_ptr;
-            return iterator(node);
-        }
-
-        iterator end() const {
-            node_ptr node = const_range_type::end().my_node_ptr;
-            return iterator(node);
-        }
-    }; // class range_type
-
-    range_type range() { return range_type(*this); }
-    const_range_type range() const { return const_range_type(*this); }
-
-private:
-    node_ptr internal_begin() const {
-        node_ptr head = get_head();
-        return head == nullptr ? head : head->next(0);
-    }
-
-    void internal_move(concurrent_skip_list&& other) {
-        my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_head_ptr.store(nullptr, std::memory_order_relaxed);
-
-        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_size.store(0, std::memory_order_relaxed);
-
-        my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_max_height.store(0, std::memory_order_relaxed);
-    }
-
-    void internal_move_construct_with_allocator(concurrent_skip_list&& other,
-                                                /*is_always_equal = */std::true_type) {
-        internal_move(std::move(other));
-    }
-
-    void internal_move_construct_with_allocator(concurrent_skip_list&& other,
-                                                /*is_always_equal = */std::false_type) {
-        if (my_node_allocator == other.get_allocator()) {
-            internal_move(std::move(other));
-        } else {
-            my_size.store(0, std::memory_order_relaxed);
-            my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()));
-        }
-    }
-
-    static const key_type& get_key( node_ptr n ) {
-        __TBB_ASSERT(n, nullptr);
-        return container_traits::get_key(static_cast<node_ptr>(n)->value());
-    }
-
-    template <typename K>
-    bool found( node_ptr node, const K& key ) const {
-        return node != nullptr && !my_compare(key, get_key(node));
-    }
-
-    template <typename K>
-    node_ptr internal_find(const K& key) const {
-        return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key);
-    }
-
-    template <typename K>
-    node_ptr internal_find_multi( const K& key ) const {
-        node_ptr prev = get_head();
-        if (prev == nullptr) return nullptr; // If the head node is not allocated - exit
-
-        node_ptr curr = nullptr;
-        node_ptr old_curr = curr;
-
-        for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) {
-            curr = internal_find_position(h - 1, prev, key, my_compare);
-
-            if (curr != old_curr && found(curr, key)) {
-                return curr;
-            }
-            old_curr = curr;
-        }
-        return nullptr;
-    }
-
-    template <typename K>
-    node_ptr internal_find_unique( const K& key ) const {
-        const_iterator it = lower_bound(key);
-        return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr;
-    }
-
-    template <typename K>
-    size_type internal_count( const K& key ) const {
-        if (allow_multimapping) {
-            // TODO: reimplement without double traversal
-            std::pair<const_iterator, const_iterator> r = equal_range(key);
-            return std::distance(r.first, r.second);
-        }
-        return size_type(contains(key) ? 1 : 0);
-    }
-
-    template <typename K>
-    std::pair<iterator, iterator> internal_equal_range(const K& key) const {
-        iterator lb = get_iterator(lower_bound(key));
-        auto result = std::make_pair(lb, lb);
-
-        // If the lower bound points to the node with the requested key
-        if (found(lb.my_node_ptr, key)) {
-
-            if (!allow_multimapping) {
-                // For unique containers - move the second iterator forward and exit
-                ++result.second;
-            } else {
-                // For multi containers - find the upper bound starting from the lower bound
-                node_ptr prev = lb.my_node_ptr;
-                node_ptr curr = nullptr;
-                not_greater_compare cmp(my_compare);
-
-                // Start from the lower bound of the range
-                for (size_type h = prev->height(); h > 0; --h) {
-                    curr = prev->next(h - 1);
-                    while (curr && cmp(get_key(curr), key)) {
-                        prev = curr;
-                        // If the height of the next node is greater than the current one - jump to its height
-                        if (h < curr->height()) {
-                            h = curr->height();
-                        }
-                        curr = prev->next(h - 1);
-                    }
-                }
-                result.second = iterator(curr);
-            }
-        }
-
-        return result;
-    }
-
-    // Finds position on the level using comparator cmp starting from the node prev
-    template <typename K, typename Comparator>
-    node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key,
-                                     const Comparator& cmp ) const {
-        __TBB_ASSERT(level < prev->height(), "Wrong level to find position");
-        node_ptr curr = prev->next(level);
-
-        while (curr && cmp(get_key(curr), key)) {
-            prev = curr;
-            __TBB_ASSERT(level < prev->height(), nullptr);
-            curr = prev->next(level);
-        }
-
-        return curr;
-    }
-
-    // The same as previous overload, but allows index_number comparison
-    template <typename Comparator>
-    node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node,
-                                     const Comparator& cmp ) const {
-        __TBB_ASSERT(level < prev->height(), "Wrong level to find position");
-        node_ptr curr = prev->next(level);
-
-        while (curr && cmp(get_key(curr), get_key(node))) {
-            if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) {
-                break;
-            }
-
-            prev = curr;
-            __TBB_ASSERT(level < prev->height(), nullptr);
-            curr = prev->next(level);
-        }
-        return curr;
-    }
-
-    template <typename Comparator>
-    void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key,
-                               const Comparator& cmp, node_ptr head ) {
-
-        size_type curr_max_height = my_max_height.load(std::memory_order_acquire);
-        size_type node_height = node->height();
-        if (curr_max_height < node_height) {
-            std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head);
-            std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr);
-        }
-
-        node_ptr prev = head;
-        for (size_type level = curr_max_height; level > 0; --level) {
-            node_ptr curr = internal_find_position(level - 1, prev, key, cmp);
-            prev_nodes[level - 1] = prev;
-            curr_nodes[level - 1] = curr;
-        }
-    }
-
-    void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) {
-        node_ptr head = create_head_if_necessary();
-        prev_nodes.fill(head);
-
-        node_ptr prev = head;
-        for (size_type level = node->height(); level > 0; --level) {
-            while (prev->next(level - 1) != node) {
-                prev = prev->next(level - 1);
-            }
-            prev_nodes[level - 1] = prev;
-        }
-    }
-
-    struct not_greater_compare {
-        const key_compare& my_less_compare;
-
-        not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {}
-
-        template <typename K1, typename K2>
-        bool operator()( const K1& first, const K2& second ) const {
-            return !my_less_compare(second, first);
-        }
-    };
-
-    not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) {
-        return not_greater_compare(my_compare);
-    }
-
-    key_compare select_comparator( /*allow_multimapping = */ std::false_type ) {
-        return my_compare;
-    }
-
-    template<typename... Args>
-    std::pair<iterator, bool> internal_insert( Args&&... args ) {
-        node_ptr new_node = create_value_node(std::forward<Args>(args)...);
-        std::pair<iterator, bool> insert_result = internal_insert_node(new_node);
-        if (!insert_result.second) {
-            delete_value_node(new_node);
-        }
-        return insert_result;
-    }
-
-    std::pair<iterator, bool> internal_insert_node( node_ptr new_node ) {
-        array_type prev_nodes;
-        array_type curr_nodes;
-        size_type new_height = new_node->height();
-        auto compare = select_comparator(std::integral_constant<bool, allow_multimapping>{});
-
-        node_ptr head_node = create_head_if_necessary();
-
-        for (;;) {
-            fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node);
-
-            node_ptr prev = prev_nodes[0];
-            node_ptr next = curr_nodes[0];
-
-            if (allow_multimapping) {
-                new_node->set_index_number(prev->index_number() + 1);
-            } else {
-                if (found(next, get_key(new_node))) {
-                    return std::pair<iterator, bool>(iterator(next), false);
-                }
-            }
-
-            new_node->set_next(0, next);
-            if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) {
-                continue;
-            }
-
-            // If the node was successfully linked on the first level - it will be linked on other levels
-            // Insertion cannot fail starting from this point
-
-            // If the height of inserted node is greater than maximum - increase maximum
-            size_type max_height = my_max_height.load(std::memory_order_acquire);
-            for (;;) {
-                if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) {
-                    // If the maximum was successfully updated by current thread
-                    // or by an other thread for the value, greater or equal to new_height
-                    break;
-                }
-            }
-
-            for (std::size_t level = 1; level < new_height; ++level) {
-                // Link the node on upper levels
-                for (;;) {
-                    prev = prev_nodes[level];
-                    next = static_cast<node_ptr>(curr_nodes[level]);
-
-                    new_node->set_next(level, next);
-                    __TBB_ASSERT(new_node->height() > level, "Internal structure break");
-                    if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) {
-                        break;
-                    }
-
-                    for (size_type lev = level; lev != new_height; ++lev ) {
-                        curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare);
-                    }
-                }
-            }
-            ++my_size;
-            return std::pair<iterator, bool>(iterator(new_node), true);
-        }
-    }
-
-    template <typename K, typename Comparator>
-    node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const {
-        node_ptr prev = get_head();
-        if (prev == nullptr) return nullptr; // If the head node is not allocated - exit
-
-        node_ptr curr = nullptr;
-
-        for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) {
-            curr = internal_find_position(h - 1, prev, key, cmp);
-        }
-
-        return curr;
-    }
-
-    template <typename K>
-    size_type internal_erase( const K& key ) {
-        auto eq = equal_range(key);
-        size_type old_size = size();
-        unsafe_erase(eq.first, eq.second);
-        return old_size - size();
-    }
-
-    // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted
-    std::pair<node_ptr, node_ptr> internal_extract( const_iterator it ) {
-        std::pair<node_ptr, node_ptr> result(nullptr, nullptr);
-        if ( it != end() ) {
-            array_type prev_nodes;
-
-            node_ptr erase_node = it.my_node_ptr;
-            node_ptr next_node = erase_node->next(0);
-            fill_prev_array_for_existing_node(prev_nodes, erase_node);
-
-            for (size_type level = 0; level < erase_node->height(); ++level) {
-                prev_nodes[level]->set_next(level, erase_node->next(level));
-                erase_node->set_next(level, nullptr);
-            }
-            my_size.fetch_sub(1, std::memory_order_relaxed);
-
-            result.first = erase_node;
-            result.second = next_node;
-        }
-        return result;
-    }
-
-protected:
-    template<typename SourceType>
-    void internal_merge( SourceType&& source ) {
-        using source_type = typename std::decay<SourceType>::type;
-        using source_iterator = typename source_type::iterator;
-        static_assert((std::is_same<node_type, typename source_type::node_type>::value), "Incompatible containers cannot be merged");
-
-        for (source_iterator it = source.begin(); it != source.end();) {
-            source_iterator where = it++;
-            if (allow_multimapping || !contains(container_traits::get_key(*where))) {
-                node_type handle = source.unsafe_extract(where);
-                __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty");
-
-                if (!insert(std::move(handle)).second) {
-                    //If the insertion fails - return the node into source
-                    source.insert(std::move(handle));
-                }
-                __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion");
-            }
-        }
-    }
-
-private:
-    void internal_copy( const concurrent_skip_list& other ) {
-        internal_copy(other.begin(), other.end());
-    }
-
-    template<typename Iterator>
-    void internal_copy( Iterator first, Iterator last ) {
-        try_call([&] {
-            for (auto it = first; it != last; ++it) {
-                insert(*it);
-            }
-        }).on_exception([&] {
-            clear();
-            node_ptr head = my_head_ptr.load(std::memory_order_relaxed);
-            if (head != nullptr) {
-                delete_node(head);
-            }
-        });
-    }
-
-    static size_type calc_node_size( size_type height ) {
-        static_assert(alignof(list_node_type) >= alignof(typename list_node_type::atomic_node_ptr), "Incorrect alignment");
-        return sizeof(list_node_type) + height * sizeof(typename list_node_type::atomic_node_ptr);
-    }
-
-    node_ptr create_node( size_type height ) {
-        size_type sz = calc_node_size(height);
-        node_ptr node = reinterpret_cast<node_ptr>(node_allocator_traits::allocate(my_node_allocator, sz));
-        node_allocator_traits::construct(my_node_allocator, node, height, my_node_allocator);
-        return node;
-    }
-
-    template <typename... Args>
-    node_ptr create_value_node( Args&&... args ) {
-        node_ptr node = create_node(my_rng());
-
-        // try_call API is not convenient here due to broken
-        // variadic capture on GCC 4.8.5
-        auto value_guard = make_raii_guard([&] {
-            delete_node(node);
-        });
-
-        // Construct the value inside the node
-        node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward<Args>(args)...);
-        value_guard.dismiss();
-        return node;
-    }
-
-    node_ptr create_head_node() {
-        return create_node(max_level);
-    }
-
-    void delete_node( node_ptr node ) {
-        size_type sz = calc_node_size(node->height());
-
-        // Destroy the node
-        node_allocator_traits::destroy(my_node_allocator, node);
-        // Deallocate the node
-        node_allocator_traits::deallocate(my_node_allocator, reinterpret_cast<std::uint8_t*>(node), sz);
-    }
-
-    void delete_value_node( node_ptr node ) {
-        // Destroy the value inside the node
-        node_allocator_traits::destroy(my_node_allocator, node->storage());
-        delete_node(node);
-    }
-
-    node_ptr get_head() const {
-        return my_head_ptr.load(std::memory_order_acquire);
-    }
-
-    node_ptr create_head_if_necessary() {
-        node_ptr current_head = get_head();
-        if (current_head == nullptr) {
-            // Head node was not created - create it
-            node_ptr new_head = create_head_node();
-            if (my_head_ptr.compare_exchange_strong(current_head, new_head)) {
-                current_head = new_head;
-            } else {
-                // If an other thread has already created the head node - destroy new_head
-                // current_head now points to the actual head node
-                delete_node(new_head);
-            }
-        }
-        __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr);
-        __TBB_ASSERT(current_head != nullptr, nullptr);
-        return current_head;
-    }
-
-    static iterator get_iterator( const_iterator it ) {
-        return iterator(it.my_node_ptr);
-    }
-
-    void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) {
-        internal_move(std::move(other));
-    }
-
-    void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) {
-        if (my_node_allocator == other.my_node_allocator) {
-            internal_move(std::move(other));
-        } else {
-            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()));
-        }
-    }
-
-    void internal_swap_fields( concurrent_skip_list& other ) {
-        using std::swap;
-        swap_allocators(my_node_allocator, other.my_node_allocator);
-        swap(my_compare, other.my_compare);
-        swap(my_rng, other.my_rng);
-
-        swap_atomics_relaxed(my_head_ptr, other.my_head_ptr);
-        swap_atomics_relaxed(my_size, other.my_size);
-        swap_atomics_relaxed(my_max_height, other.my_max_height);
-    }
-
-    void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) {
-        internal_swap_fields(other);
-    }
-
-    void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) {
-        __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed");
-        internal_swap_fields(other);
-    }
-
-    node_allocator_type my_node_allocator;
-    key_compare my_compare;
-    random_level_generator_type my_rng;
-    std::atomic<list_node_type*> my_head_ptr;
-    std::atomic<size_type> my_size;
-    std::atomic<size_type> my_max_height;
-
-    template<typename OtherTraits>
-    friend class concurrent_skip_list;
-}; // class concurrent_skip_list
-
-template <typename Traits>
-bool operator==( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    if (lhs.size() != rhs.size()) return false;
-#if _MSC_VER
-    // Passing "unchecked" iterators to std::equal with 3 parameters
-    // causes compiler warnings.
-    // The workaround is to use overload with 4 parameters, which is
-    // available since C++14 - minimally supported version on MSVC
-    return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-#else
-    return std::equal(lhs.begin(), lhs.end(), rhs.begin());
-#endif
-}
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template <typename Traits>
-bool operator!=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return !(lhs == rhs);
-}
-#endif
-
-#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
-template <typename Traits>
-tbb::detail::synthesized_three_way_result<typename Traits::value_type>
-operator<=>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(),
-                                                  rhs.begin(), rhs.end(),
-                                                  tbb::detail::synthesized_three_way_comparator{});
-}
-#else
-template <typename Traits>
-bool operator<( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-template <typename Traits>
-bool operator>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return rhs < lhs;
-}
-
-template <typename Traits>
-bool operator<=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return !(rhs < lhs);
-}
-
-template <typename Traits>
-bool operator>=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) {
-    return !(lhs < rhs);
-}
-#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT
-
-// Generates a number from the interval [0, MaxLevel).
-template <std::size_t MaxLevel>
-class concurrent_geometric_level_generator {
-public:
-    static constexpr std::size_t max_level = MaxLevel;
-    // TODO: modify the algorithm to accept other values of max_level
-    static_assert(max_level == 32, "Incompatible max_level for rng");
-
-    concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {}
-
-    std::size_t operator()() {
-        // +1 is required to pass at least 1 into log2 (log2(0) is undefined)
-        // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1)
-        std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1;
-        __TBB_ASSERT(result <= max_level, nullptr);
-        return result;
-    }
-
-private:
-    tbb::enumerable_thread_specific<std::minstd_rand> engines;
-};
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(pop) // warning 4127 is back
-#endif
-
-#endif // __TBB_detail__concurrent_skip_list_H
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__concurrent_skip_list_H 
+#define __TBB_detail__concurrent_skip_list_H 
+ 
+#if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H) 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+#include "_config.h" 
+#include "_range_common.h" 
+#include "_allocator_traits.h" 
+#include "_template_helpers.h" 
+#include "_node_handle.h" 
+#include "_containers_helpers.h" 
+#include "_assert.h" 
+#include "_exception.h" 
+#include "../enumerable_thread_specific.h" 
+#include <utility> 
+#include <initializer_list> 
+#include <atomic> 
+#include <array> 
+#include <type_traits> 
+#include <random> // Need std::geometric_distribution 
+#include <algorithm> // Need std::equal and std::lexicographical_compare 
+#include <cstdint> 
+#if __TBB_CPP20_COMPARISONS_PRESENT 
+#include <compare> 
+#endif 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(push) 
+#pragma warning(disable: 4127) // warning C4127: conditional expression is constant 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Value, typename Allocator> 
+class skip_list_node { 
+    using node_ptr = skip_list_node*; 
+public: 
+    using value_type = Value; 
+    using atomic_node_ptr = std::atomic<node_ptr>; 
+    using size_type = std::size_t; 
+    using container_allocator_type = Allocator; 
+ 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+private: 
+    using allocator_traits = tbb::detail::allocator_traits<container_allocator_type>; 
+ 
+    // Allocator is the same as the container allocator=> allocates unitptr_t 
+    // It is required to rebind it to value_type to get the correct pointer and const_pointer 
+    using value_allocator_traits = typename allocator_traits::template rebind_traits<value_type>; 
+public: 
+    using pointer = typename value_allocator_traits::pointer; 
+    using const_pointer = typename value_allocator_traits::const_pointer; 
+ 
+    skip_list_node( size_type levels, container_allocator_type& alloc ) 
+        : my_container_allocator(alloc), my_height(levels), my_index_number(0) 
+    { 
+        for (size_type l = 0; l < my_height; ++l) { 
+            allocator_traits::construct(my_container_allocator, &get_atomic_next(l), nullptr); 
+        } 
+    } 
+ 
+    ~skip_list_node() { 
+        for (size_type l = 0; l < my_height; ++l) { 
+            allocator_traits::destroy(my_container_allocator, &get_atomic_next(l)); 
+        } 
+    } 
+ 
+    skip_list_node( const skip_list_node& ) = delete; 
+    skip_list_node( skip_list_node&& ) = delete; 
+    skip_list_node& operator=( const skip_list_node& ) = delete; 
+    skip_list_node& operator=( skip_list_node&& ) = delete; 
+ 
+    pointer storage() { 
+        return &my_value; 
+    } 
+ 
+    reference value() { 
+        return *storage(); 
+    } 
+ 
+    node_ptr next( size_type level ) const { 
+        node_ptr res = get_atomic_next(level).load(std::memory_order_acquire); 
+        __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure"); 
+        return res; 
+    } 
+ 
+    atomic_node_ptr& atomic_next( size_type level ) { 
+        atomic_node_ptr& res = get_atomic_next(level); 
+#if TBB_USE_DEBUG 
+        node_ptr node = res.load(std::memory_order_acquire); 
+        __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure"); 
+#endif 
+        return res; 
+    } 
+ 
+    void set_next( size_type level, node_ptr n ) { 
+        __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure"); 
+        get_atomic_next(level).store(n, std::memory_order_relaxed); 
+    } 
+ 
+    size_type height() const { 
+        return my_height; 
+    } 
+ 
+    void set_index_number( size_type index_num ) { 
+        my_index_number = index_num; 
+    } 
+ 
+    size_type index_number() const { 
+        return my_index_number; 
+    } 
+ 
+private: 
+    atomic_node_ptr& get_atomic_next( size_type level ) { 
+        atomic_node_ptr* arr = reinterpret_cast<atomic_node_ptr*>(this + 1); 
+        return arr[level]; 
+    } 
+ 
+    const atomic_node_ptr& get_atomic_next( size_type level ) const { 
+        const atomic_node_ptr* arr = reinterpret_cast<const atomic_node_ptr*>(this + 1); 
+        return arr[level]; 
+    } 
+ 
+    container_allocator_type& my_container_allocator; 
+    union { 
+        value_type my_value; 
+    }; 
+    size_type my_height; 
+    size_type my_index_number; 
+}; // class skip_list_node 
+ 
+template <typename NodeType, typename ValueType> 
+class skip_list_iterator { 
+    using node_type = NodeType; 
+    using node_ptr = node_type*; 
+public: 
+    using iterator_category = std::forward_iterator_tag; 
+    using value_type = ValueType; 
+ 
+    using difference_type = std::ptrdiff_t; 
+    using pointer = value_type*; 
+    using reference = value_type&; 
+ 
+    skip_list_iterator() : skip_list_iterator(nullptr) {} 
+ 
+    skip_list_iterator( const skip_list_iterator<node_type, typename node_type::value_type>& other ) 
+        : my_node_ptr(other.my_node_ptr) {} 
+ 
+    skip_list_iterator& operator=( const skip_list_iterator<node_type, typename node_type::value_type>& other ) { 
+        my_node_ptr = other.my_node_ptr; 
+        return *this; 
+    } 
+ 
+    reference operator*() const { return my_node_ptr->value(); } 
+    pointer operator->() const { return my_node_ptr->storage(); } 
+ 
+    skip_list_iterator& operator++() { 
+        __TBB_ASSERT(my_node_ptr != nullptr, nullptr); 
+        my_node_ptr = my_node_ptr->next(0); 
+        return *this; 
+    } 
+ 
+    skip_list_iterator operator++(int) { 
+        skip_list_iterator tmp = *this; 
+        ++*this; 
+        return tmp; 
+    } 
+ 
+private: 
+    skip_list_iterator(node_type* n) : my_node_ptr(n) {} 
+ 
+    node_ptr my_node_ptr; 
+ 
+    template <typename Traits> 
+    friend class concurrent_skip_list; 
+ 
+    template <typename N, typename V> 
+    friend class skip_list_iterator; 
+ 
+    friend class const_range; 
+    friend class range; 
+ 
+    friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { 
+        return lhs.my_node_ptr == rhs.my_node_ptr; 
+    } 
+ 
+    friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { 
+        return lhs.my_node_ptr != rhs.my_node_ptr; 
+    } 
+}; // class skip_list_iterator 
+ 
+template <typename Traits> 
+class concurrent_skip_list { 
+protected: 
+    using container_traits = Traits; 
+    using self_type = concurrent_skip_list<container_traits>; 
+    using allocator_type = typename container_traits::allocator_type; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+    using key_compare = typename container_traits::compare_type; 
+    using value_compare = typename container_traits::value_compare; 
+    using key_type = typename container_traits::key_type; 
+    using value_type = typename container_traits::value_type; 
+    static_assert(std::is_same<value_type, typename allocator_type::value_type>::value, 
+                  "value_type of the container should be the same as its allocator"); 
+ 
+    using size_type = std::size_t; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    static constexpr size_type max_level = container_traits::max_level; 
+ 
+    using node_allocator_type = typename allocator_traits_type::template rebind_alloc<std::uint8_t>; 
+    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; 
+ 
+    using list_node_type = skip_list_node<value_type, node_allocator_type>; 
+    using node_type = node_handle<key_type, value_type, list_node_type, allocator_type>; 
+ 
+    using iterator = skip_list_iterator<list_node_type, value_type>; 
+    using const_iterator = skip_list_iterator<list_node_type, const value_type>; 
+ 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using random_level_generator_type = typename container_traits::random_level_generator_type; 
+ 
+    using node_ptr = list_node_type*; 
+ 
+    using array_type = std::array<node_ptr, max_level>; 
+private: 
+    template <typename T> 
+    using is_transparent = dependent_bool<comp_is_transparent<key_compare>, T>; 
+public: 
+    static constexpr bool allow_multimapping = container_traits::allow_multimapping; 
+ 
+    concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {} 
+ 
+    explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() ) 
+        : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {} 
+ 
+    explicit concurrent_skip_list( const allocator_type& alloc ) 
+        : concurrent_skip_list(key_compare(), alloc) {} 
+ 
+    template<typename InputIterator> 
+    concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(), 
+                          const allocator_type& alloc = allocator_type() ) 
+        : concurrent_skip_list(comp, alloc) 
+    { 
+        internal_copy(first, last); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc ) 
+        : concurrent_skip_list(first, last, key_compare(), alloc) {} 
+ 
+    concurrent_skip_list( std::initializer_list<value_type> init, const key_compare& comp = key_compare(), 
+                          const allocator_type& alloc = allocator_type() ) 
+        : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {} 
+ 
+    concurrent_skip_list( std::initializer_list<value_type> init, const allocator_type& alloc ) 
+        : concurrent_skip_list(init, key_compare(), alloc) {} 
+ 
+    concurrent_skip_list( const concurrent_skip_list& other ) 
+        : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())), 
+          my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), 
+          my_size(0), my_max_height(0) 
+    { 
+        internal_copy(other); 
+        __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); 
+    } 
+ 
+    concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc ) 
+        : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), 
+          my_size(0), my_max_height(0) 
+    { 
+        internal_copy(other); 
+        __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); 
+    } 
+ 
+    concurrent_skip_list( concurrent_skip_list&& other ) 
+        : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare), 
+          my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move 
+    { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc ) 
+        : my_node_allocator(alloc), my_compare(other.my_compare), 
+          my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) 
+    { 
+        using is_always_equal = typename allocator_traits_type::is_always_equal; 
+        internal_move_construct_with_allocator(std::move(other), is_always_equal()); 
+    } 
+ 
+    ~concurrent_skip_list() { 
+        clear(); 
+        node_ptr head = my_head_ptr.load(std::memory_order_relaxed); 
+        if (head != nullptr) { 
+            delete_node(head); 
+        } 
+    } 
+ 
+    concurrent_skip_list& operator=( const concurrent_skip_list& other ) { 
+        if (this != &other) { 
+            clear(); 
+            copy_assign_allocators(my_node_allocator, other.my_node_allocator); 
+            my_compare = other.my_compare; 
+            my_rng = other.my_rng; 
+            internal_copy(other); 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_skip_list& operator=( concurrent_skip_list&& other ) { 
+        if (this != &other) { 
+            clear(); 
+            my_compare = std::move(other.my_compare); 
+            my_rng = std::move(other.my_rng); 
+ 
+            move_assign_allocators(my_node_allocator, other.my_node_allocator); 
+            using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; 
+            using is_always_equal = typename node_allocator_traits::is_always_equal; 
+            internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_skip_list& operator=( std::initializer_list<value_type> il ) 
+    { 
+        clear(); 
+        insert(il.begin(),il.end()); 
+        return *this; 
+    } 
+ 
+    std::pair<iterator, bool> insert( const value_type& value ) { 
+        return internal_insert(value); 
+    } 
+ 
+    std::pair<iterator, bool> insert( value_type&& value ) { 
+        return internal_insert(std::move(value)); 
+    } 
+ 
+    iterator insert( const_iterator, const_reference value ) { 
+        // Ignore hint 
+        return insert(value).first; 
+    } 
+ 
+    iterator insert( const_iterator, value_type&& value ) { 
+        // Ignore hint 
+        return insert(std::move(value)).first; 
+    } 
+ 
+    template<typename InputIterator> 
+    void insert( InputIterator first, InputIterator last ) { 
+        while (first != last) { 
+            insert(*first); 
+            ++first; 
+        } 
+    } 
+ 
+    void insert( std::initializer_list<value_type> init ) { 
+        insert(init.begin(), init.end()); 
+    } 
+ 
+    std::pair<iterator, bool> insert( node_type&& nh ) { 
+        if (!nh.empty()) { 
+            auto insert_node = node_handle_accessor::get_node_ptr(nh); 
+            std::pair<iterator, bool> insert_result = internal_insert_node(insert_node); 
+            if (insert_result.second) { 
+                node_handle_accessor::deactivate(nh); 
+            } 
+            return insert_result; 
+        } 
+        return std::pair<iterator, bool>(end(), false); 
+    } 
+ 
+    iterator insert( const_iterator, node_type&& nh ) { 
+        // Ignore hint 
+        return insert(std::move(nh)).first; 
+    } 
+ 
+    template<typename... Args> 
+    std::pair<iterator, bool> emplace( Args&&... args ) { 
+        return internal_insert(std::forward<Args>(args)...); 
+    } 
+ 
+    template<typename... Args> 
+    iterator emplace_hint( const_iterator, Args&&... args ) { 
+        // Ignore hint 
+        return emplace(std::forward<Args>(args)...).first; 
+    } 
+ 
+    iterator unsafe_erase( iterator pos ) { 
+        std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); 
+        if (extract_result.first) { // node was extracted 
+            delete_value_node(extract_result.first); 
+            return extract_result.second; 
+        } 
+        return end(); 
+    } 
+ 
+    iterator unsafe_erase( const_iterator pos ) { 
+        return unsafe_erase(get_iterator(pos)); 
+    } 
+ 
+    iterator unsafe_erase( const_iterator first, const_iterator last ) { 
+        while (first != last) { 
+            // Unsafe erase returns the iterator which follows the erased one 
+            first = unsafe_erase(first); 
+        } 
+        return get_iterator(first); 
+    } 
+ 
+    size_type unsafe_erase( const key_type& key ) { 
+        return internal_erase(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value 
+                            && !std::is_convertible<K, const_iterator>::value 
+                            && !std::is_convertible<K, iterator>::value, 
+                            size_type>::type unsafe_erase( const K& key ) 
+    { 
+        return internal_erase(key); 
+    } 
+ 
+    node_type unsafe_extract( const_iterator pos ) { 
+        std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); 
+        return extract_result.first ? node_handle_accessor::construct<node_type>(extract_result.first) : node_type(); 
+    } 
+ 
+    node_type unsafe_extract( iterator pos ) { 
+        return unsafe_extract(const_iterator(pos)); 
+    } 
+ 
+    node_type unsafe_extract( const key_type& key ) { 
+        return unsafe_extract(find(key)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value 
+                            && !std::is_convertible<K, const_iterator>::value 
+                            && !std::is_convertible<K, iterator>::value, 
+                            node_type>::type unsafe_extract( const K& key ) 
+    { 
+        return unsafe_extract(find(key)); 
+    } 
+ 
+    iterator lower_bound( const key_type& key ) { 
+        return iterator(internal_get_bound(key, my_compare)); 
+    } 
+ 
+    const_iterator lower_bound( const key_type& key ) const { 
+        return const_iterator(internal_get_bound(key, my_compare)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, iterator>::type lower_bound( const K& key ) { 
+        return iterator(internal_get_bound(key, my_compare)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, const_iterator>::type lower_bound( const K& key ) const { 
+        return const_iterator(internal_get_bound(key, my_compare)); 
+    } 
+ 
+    iterator upper_bound( const key_type& key ) { 
+        return iterator(internal_get_bound(key, not_greater_compare(my_compare))); 
+    } 
+ 
+    const_iterator upper_bound( const key_type& key ) const { 
+        return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, iterator>::type upper_bound( const K& key ) { 
+        return iterator(internal_get_bound(key, not_greater_compare(my_compare))); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, const_iterator>::type upper_bound( const K& key ) const { 
+        return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); 
+    } 
+ 
+    iterator find( const key_type& key ) { 
+        return iterator(internal_find(key)); 
+    } 
+ 
+    const_iterator find( const key_type& key ) const { 
+        return const_iterator(internal_find(key)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { 
+        return iterator(internal_find(key)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { 
+        return const_iterator(internal_find(key)); 
+    } 
+ 
+    size_type count( const key_type& key ) const { 
+        return internal_count(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { 
+        return internal_count(key); 
+    } 
+ 
+    bool contains( const key_type& key ) const { 
+        return find(key) != end(); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { 
+        return find(key) != end(); 
+    } 
+ 
+    void clear() noexcept { 
+        // clear is not thread safe - load can be relaxed 
+        node_ptr head = my_head_ptr.load(std::memory_order_relaxed); 
+ 
+        if (head == nullptr) return; // Head is not allocated => container is empty 
+ 
+        node_ptr current = head->next(0); 
+ 
+        // Delete all value nodes in the container 
+        while (current) { 
+            node_ptr next = current->next(0); 
+            delete_value_node(current); 
+            current = next; 
+        } 
+ 
+        for (size_type level = 0; level < head->height(); ++level) { 
+            head->set_next(level, nullptr); 
+        } 
+ 
+        my_size.store(0, std::memory_order_relaxed); 
+        my_max_height.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    iterator begin() { 
+        return iterator(internal_begin()); 
+    } 
+ 
+    const_iterator begin() const { 
+        return const_iterator(internal_begin()); 
+    } 
+ 
+    const_iterator cbegin() const { 
+        return const_iterator(internal_begin()); 
+    } 
+ 
+    iterator end() { 
+        return iterator(nullptr); 
+    } 
+ 
+    const_iterator end() const { 
+        return const_iterator(nullptr); 
+    } 
+ 
+    const_iterator cend() const { 
+        return const_iterator(nullptr); 
+    } 
+ 
+    size_type size() const { 
+        return my_size.load(std::memory_order_relaxed); 
+    } 
+ 
+    size_type max_size() const { 
+        return node_allocator_traits::max_size(my_node_allocator); 
+    } 
+ 
+    __TBB_nodiscard bool empty() const { 
+        return 0 == size(); 
+    } 
+ 
+    allocator_type get_allocator() const { 
+        return my_node_allocator; 
+    } 
+ 
+    void swap(concurrent_skip_list& other) { 
+        if (this != &other) { 
+            using pocs_type = typename node_allocator_traits::propagate_on_container_swap; 
+            using is_always_equal = typename node_allocator_traits::is_always_equal; 
+            internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); 
+        } 
+    } 
+ 
+    std::pair<iterator, iterator> equal_range(const key_type& key) { 
+        return internal_equal_range(key); 
+    } 
+ 
+    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { 
+        return internal_equal_range(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { 
+        return internal_equal_range(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { 
+        return internal_equal_range(key); 
+    } 
+ 
+    key_compare key_comp() const { return my_compare; } 
+ 
+    value_compare value_comp() const { return container_traits::value_comp(my_compare); } 
+ 
+    class const_range_type { 
+    public: 
+        using size_type = typename concurrent_skip_list::size_type; 
+        using value_type = typename concurrent_skip_list::value_type; 
+        using iterator = typename concurrent_skip_list::const_iterator; 
+ 
+        bool empty() const { 
+            return my_begin.my_node_ptr->next(0) == my_end.my_node_ptr; 
+        } 
+ 
+        bool is_divisible() const { 
+            return my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false; 
+        } 
+ 
+        size_type size() const { return std::distance(my_begin, my_end); } 
+ 
+        const_range_type( const_range_type& r, split) 
+            : my_end(r.my_end) { 
+            my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1)); 
+            my_level = my_begin.my_node_ptr->height(); 
+            r.my_end = my_begin; 
+        } 
+ 
+        const_range_type( const concurrent_skip_list& l) 
+            : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr->height() ) {} 
+ 
+        iterator begin() const { return my_begin; } 
+        iterator end() const { return my_end; } 
+        size_type grainsize() const { return 1; } 
+ 
+    private: 
+        const_iterator my_end; 
+        const_iterator my_begin; 
+        size_type my_level; 
+    }; // class const_range_type 
+ 
+    class range_type : public const_range_type { 
+    public: 
+        using iterator = typename concurrent_skip_list::iterator; 
+ 
+        range_type(range_type& r, split) : const_range_type(r, split()) {} 
+        range_type(const concurrent_skip_list& l) : const_range_type(l) {} 
+ 
+        iterator begin() const { 
+            node_ptr node = const_range_type::begin().my_node_ptr; 
+            return iterator(node); 
+        } 
+ 
+        iterator end() const { 
+            node_ptr node = const_range_type::end().my_node_ptr; 
+            return iterator(node); 
+        } 
+    }; // class range_type 
+ 
+    range_type range() { return range_type(*this); } 
+    const_range_type range() const { return const_range_type(*this); } 
+ 
+private: 
+    node_ptr internal_begin() const { 
+        node_ptr head = get_head(); 
+        return head == nullptr ? head : head->next(0); 
+    } 
+ 
+    void internal_move(concurrent_skip_list&& other) { 
+        my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_head_ptr.store(nullptr, std::memory_order_relaxed); 
+ 
+        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_size.store(0, std::memory_order_relaxed); 
+ 
+        my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_max_height.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    void internal_move_construct_with_allocator(concurrent_skip_list&& other, 
+                                                /*is_always_equal = */std::true_type) { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    void internal_move_construct_with_allocator(concurrent_skip_list&& other, 
+                                                /*is_always_equal = */std::false_type) { 
+        if (my_node_allocator == other.get_allocator()) { 
+            internal_move(std::move(other)); 
+        } else { 
+            my_size.store(0, std::memory_order_relaxed); 
+            my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); 
+        } 
+    } 
+ 
+    static const key_type& get_key( node_ptr n ) { 
+        __TBB_ASSERT(n, nullptr); 
+        return container_traits::get_key(static_cast<node_ptr>(n)->value()); 
+    } 
+ 
+    template <typename K> 
+    bool found( node_ptr node, const K& key ) const { 
+        return node != nullptr && !my_compare(key, get_key(node)); 
+    } 
+ 
+    template <typename K> 
+    node_ptr internal_find(const K& key) const { 
+        return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key); 
+    } 
+ 
+    template <typename K> 
+    node_ptr internal_find_multi( const K& key ) const { 
+        node_ptr prev = get_head(); 
+        if (prev == nullptr) return nullptr; // If the head node is not allocated - exit 
+ 
+        node_ptr curr = nullptr; 
+        node_ptr old_curr = curr; 
+ 
+        for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { 
+            curr = internal_find_position(h - 1, prev, key, my_compare); 
+ 
+            if (curr != old_curr && found(curr, key)) { 
+                return curr; 
+            } 
+            old_curr = curr; 
+        } 
+        return nullptr; 
+    } 
+ 
+    template <typename K> 
+    node_ptr internal_find_unique( const K& key ) const { 
+        const_iterator it = lower_bound(key); 
+        return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr; 
+    } 
+ 
+    template <typename K> 
+    size_type internal_count( const K& key ) const { 
+        if (allow_multimapping) { 
+            // TODO: reimplement without double traversal 
+            std::pair<const_iterator, const_iterator> r = equal_range(key); 
+            return std::distance(r.first, r.second); 
+        } 
+        return size_type(contains(key) ? 1 : 0); 
+    } 
+ 
+    template <typename K> 
+    std::pair<iterator, iterator> internal_equal_range(const K& key) const { 
+        iterator lb = get_iterator(lower_bound(key)); 
+        auto result = std::make_pair(lb, lb); 
+ 
+        // If the lower bound points to the node with the requested key 
+        if (found(lb.my_node_ptr, key)) { 
+ 
+            if (!allow_multimapping) { 
+                // For unique containers - move the second iterator forward and exit 
+                ++result.second; 
+            } else { 
+                // For multi containers - find the upper bound starting from the lower bound 
+                node_ptr prev = lb.my_node_ptr; 
+                node_ptr curr = nullptr; 
+                not_greater_compare cmp(my_compare); 
+ 
+                // Start from the lower bound of the range 
+                for (size_type h = prev->height(); h > 0; --h) { 
+                    curr = prev->next(h - 1); 
+                    while (curr && cmp(get_key(curr), key)) { 
+                        prev = curr; 
+                        // If the height of the next node is greater than the current one - jump to its height 
+                        if (h < curr->height()) { 
+                            h = curr->height(); 
+                        } 
+                        curr = prev->next(h - 1); 
+                    } 
+                } 
+                result.second = iterator(curr); 
+            } 
+        } 
+ 
+        return result; 
+    } 
+ 
+    // Finds position on the level using comparator cmp starting from the node prev 
+    template <typename K, typename Comparator> 
+    node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key, 
+                                     const Comparator& cmp ) const { 
+        __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); 
+        node_ptr curr = prev->next(level); 
+ 
+        while (curr && cmp(get_key(curr), key)) { 
+            prev = curr; 
+            __TBB_ASSERT(level < prev->height(), nullptr); 
+            curr = prev->next(level); 
+        } 
+ 
+        return curr; 
+    } 
+ 
+    // The same as previous overload, but allows index_number comparison 
+    template <typename Comparator> 
+    node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node, 
+                                     const Comparator& cmp ) const { 
+        __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); 
+        node_ptr curr = prev->next(level); 
+ 
+        while (curr && cmp(get_key(curr), get_key(node))) { 
+            if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) { 
+                break; 
+            } 
+ 
+            prev = curr; 
+            __TBB_ASSERT(level < prev->height(), nullptr); 
+            curr = prev->next(level); 
+        } 
+        return curr; 
+    } 
+ 
+    template <typename Comparator> 
+    void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key, 
+                               const Comparator& cmp, node_ptr head ) { 
+ 
+        size_type curr_max_height = my_max_height.load(std::memory_order_acquire); 
+        size_type node_height = node->height(); 
+        if (curr_max_height < node_height) { 
+            std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head); 
+            std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr); 
+        } 
+ 
+        node_ptr prev = head; 
+        for (size_type level = curr_max_height; level > 0; --level) { 
+            node_ptr curr = internal_find_position(level - 1, prev, key, cmp); 
+            prev_nodes[level - 1] = prev; 
+            curr_nodes[level - 1] = curr; 
+        } 
+    } 
+ 
+    void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) { 
+        node_ptr head = create_head_if_necessary(); 
+        prev_nodes.fill(head); 
+ 
+        node_ptr prev = head; 
+        for (size_type level = node->height(); level > 0; --level) { 
+            while (prev->next(level - 1) != node) { 
+                prev = prev->next(level - 1); 
+            } 
+            prev_nodes[level - 1] = prev; 
+        } 
+    } 
+ 
+    struct not_greater_compare { 
+        const key_compare& my_less_compare; 
+ 
+        not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {} 
+ 
+        template <typename K1, typename K2> 
+        bool operator()( const K1& first, const K2& second ) const { 
+            return !my_less_compare(second, first); 
+        } 
+    }; 
+ 
+    not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) { 
+        return not_greater_compare(my_compare); 
+    } 
+ 
+    key_compare select_comparator( /*allow_multimapping = */ std::false_type ) { 
+        return my_compare; 
+    } 
+ 
+    template<typename... Args> 
+    std::pair<iterator, bool> internal_insert( Args&&... args ) { 
+        node_ptr new_node = create_value_node(std::forward<Args>(args)...); 
+        std::pair<iterator, bool> insert_result = internal_insert_node(new_node); 
+        if (!insert_result.second) { 
+            delete_value_node(new_node); 
+        } 
+        return insert_result; 
+    } 
+ 
+    std::pair<iterator, bool> internal_insert_node( node_ptr new_node ) { 
+        array_type prev_nodes; 
+        array_type curr_nodes; 
+        size_type new_height = new_node->height(); 
+        auto compare = select_comparator(std::integral_constant<bool, allow_multimapping>{}); 
+ 
+        node_ptr head_node = create_head_if_necessary(); 
+ 
+        for (;;) { 
+            fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node); 
+ 
+            node_ptr prev = prev_nodes[0]; 
+            node_ptr next = curr_nodes[0]; 
+ 
+            if (allow_multimapping) { 
+                new_node->set_index_number(prev->index_number() + 1); 
+            } else { 
+                if (found(next, get_key(new_node))) { 
+                    return std::pair<iterator, bool>(iterator(next), false); 
+                } 
+            } 
+ 
+            new_node->set_next(0, next); 
+            if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) { 
+                continue; 
+            } 
+ 
+            // If the node was successfully linked on the first level - it will be linked on other levels 
+            // Insertion cannot fail starting from this point 
+ 
+            // If the height of inserted node is greater than maximum - increase maximum 
+            size_type max_height = my_max_height.load(std::memory_order_acquire); 
+            for (;;) { 
+                if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) { 
+                    // If the maximum was successfully updated by current thread 
+                    // or by an other thread for the value, greater or equal to new_height 
+                    break; 
+                } 
+            } 
+ 
+            for (std::size_t level = 1; level < new_height; ++level) { 
+                // Link the node on upper levels 
+                for (;;) { 
+                    prev = prev_nodes[level]; 
+                    next = static_cast<node_ptr>(curr_nodes[level]); 
+ 
+                    new_node->set_next(level, next); 
+                    __TBB_ASSERT(new_node->height() > level, "Internal structure break"); 
+                    if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) { 
+                        break; 
+                    } 
+ 
+                    for (size_type lev = level; lev != new_height; ++lev ) { 
+                        curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare); 
+                    } 
+                } 
+            } 
+            ++my_size; 
+            return std::pair<iterator, bool>(iterator(new_node), true); 
+        } 
+    } 
+ 
+    template <typename K, typename Comparator> 
+    node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const { 
+        node_ptr prev = get_head(); 
+        if (prev == nullptr) return nullptr; // If the head node is not allocated - exit 
+ 
+        node_ptr curr = nullptr; 
+ 
+        for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { 
+            curr = internal_find_position(h - 1, prev, key, cmp); 
+        } 
+ 
+        return curr; 
+    } 
+ 
+    template <typename K> 
+    size_type internal_erase( const K& key ) { 
+        auto eq = equal_range(key); 
+        size_type old_size = size(); 
+        unsafe_erase(eq.first, eq.second); 
+        return old_size - size(); 
+    } 
+ 
+    // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted 
+    std::pair<node_ptr, node_ptr> internal_extract( const_iterator it ) { 
+        std::pair<node_ptr, node_ptr> result(nullptr, nullptr); 
+        if ( it != end() ) { 
+            array_type prev_nodes; 
+ 
+            node_ptr erase_node = it.my_node_ptr; 
+            node_ptr next_node = erase_node->next(0); 
+            fill_prev_array_for_existing_node(prev_nodes, erase_node); 
+ 
+            for (size_type level = 0; level < erase_node->height(); ++level) { 
+                prev_nodes[level]->set_next(level, erase_node->next(level)); 
+                erase_node->set_next(level, nullptr); 
+            } 
+            my_size.fetch_sub(1, std::memory_order_relaxed); 
+ 
+            result.first = erase_node; 
+            result.second = next_node; 
+        } 
+        return result; 
+    } 
+ 
+protected: 
+    template<typename SourceType> 
+    void internal_merge( SourceType&& source ) { 
+        using source_type = typename std::decay<SourceType>::type; 
+        using source_iterator = typename source_type::iterator; 
+        static_assert((std::is_same<node_type, typename source_type::node_type>::value), "Incompatible containers cannot be merged"); 
+ 
+        for (source_iterator it = source.begin(); it != source.end();) { 
+            source_iterator where = it++; 
+            if (allow_multimapping || !contains(container_traits::get_key(*where))) { 
+                node_type handle = source.unsafe_extract(where); 
+                __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); 
+ 
+                if (!insert(std::move(handle)).second) { 
+                    //If the insertion fails - return the node into source 
+                    source.insert(std::move(handle)); 
+                } 
+                __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion"); 
+            } 
+        } 
+    } 
+ 
+private: 
+    void internal_copy( const concurrent_skip_list& other ) { 
+        internal_copy(other.begin(), other.end()); 
+    } 
+ 
+    template<typename Iterator> 
+    void internal_copy( Iterator first, Iterator last ) { 
+        try_call([&] { 
+            for (auto it = first; it != last; ++it) { 
+                insert(*it); 
+            } 
+        }).on_exception([&] { 
+            clear(); 
+            node_ptr head = my_head_ptr.load(std::memory_order_relaxed); 
+            if (head != nullptr) { 
+                delete_node(head); 
+            } 
+        }); 
+    } 
+ 
+    static size_type calc_node_size( size_type height ) { 
+        static_assert(alignof(list_node_type) >= alignof(typename list_node_type::atomic_node_ptr), "Incorrect alignment"); 
+        return sizeof(list_node_type) + height * sizeof(typename list_node_type::atomic_node_ptr); 
+    } 
+ 
+    node_ptr create_node( size_type height ) { 
+        size_type sz = calc_node_size(height); 
+        node_ptr node = reinterpret_cast<node_ptr>(node_allocator_traits::allocate(my_node_allocator, sz)); 
+        node_allocator_traits::construct(my_node_allocator, node, height, my_node_allocator); 
+        return node; 
+    } 
+ 
+    template <typename... Args> 
+    node_ptr create_value_node( Args&&... args ) { 
+        node_ptr node = create_node(my_rng()); 
+ 
+        // try_call API is not convenient here due to broken 
+        // variadic capture on GCC 4.8.5 
+        auto value_guard = make_raii_guard([&] { 
+            delete_node(node); 
+        }); 
+ 
+        // Construct the value inside the node 
+        node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward<Args>(args)...); 
+        value_guard.dismiss(); 
+        return node; 
+    } 
+ 
+    node_ptr create_head_node() { 
+        return create_node(max_level); 
+    } 
+ 
+    void delete_node( node_ptr node ) { 
+        size_type sz = calc_node_size(node->height()); 
+ 
+        // Destroy the node 
+        node_allocator_traits::destroy(my_node_allocator, node); 
+        // Deallocate the node 
+        node_allocator_traits::deallocate(my_node_allocator, reinterpret_cast<std::uint8_t*>(node), sz); 
+    } 
+ 
+    void delete_value_node( node_ptr node ) { 
+        // Destroy the value inside the node 
+        node_allocator_traits::destroy(my_node_allocator, node->storage()); 
+        delete_node(node); 
+    } 
+ 
+    node_ptr get_head() const { 
+        return my_head_ptr.load(std::memory_order_acquire); 
+    } 
+ 
+    node_ptr create_head_if_necessary() { 
+        node_ptr current_head = get_head(); 
+        if (current_head == nullptr) { 
+            // Head node was not created - create it 
+            node_ptr new_head = create_head_node(); 
+            if (my_head_ptr.compare_exchange_strong(current_head, new_head)) { 
+                current_head = new_head; 
+            } else { 
+                // If an other thread has already created the head node - destroy new_head 
+                // current_head now points to the actual head node 
+                delete_node(new_head); 
+            } 
+        } 
+        __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr); 
+        __TBB_ASSERT(current_head != nullptr, nullptr); 
+        return current_head; 
+    } 
+ 
+    static iterator get_iterator( const_iterator it ) { 
+        return iterator(it.my_node_ptr); 
+    } 
+ 
+    void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) { 
+        if (my_node_allocator == other.my_node_allocator) { 
+            internal_move(std::move(other)); 
+        } else { 
+            internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); 
+        } 
+    } 
+ 
+    void internal_swap_fields( concurrent_skip_list& other ) { 
+        using std::swap; 
+        swap_allocators(my_node_allocator, other.my_node_allocator); 
+        swap(my_compare, other.my_compare); 
+        swap(my_rng, other.my_rng); 
+ 
+        swap_atomics_relaxed(my_head_ptr, other.my_head_ptr); 
+        swap_atomics_relaxed(my_size, other.my_size); 
+        swap_atomics_relaxed(my_max_height, other.my_max_height); 
+    } 
+ 
+    void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) { 
+        internal_swap_fields(other); 
+    } 
+ 
+    void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) { 
+        __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed"); 
+        internal_swap_fields(other); 
+    } 
+ 
+    node_allocator_type my_node_allocator; 
+    key_compare my_compare; 
+    random_level_generator_type my_rng; 
+    std::atomic<list_node_type*> my_head_ptr; 
+    std::atomic<size_type> my_size; 
+    std::atomic<size_type> my_max_height; 
+ 
+    template<typename OtherTraits> 
+    friend class concurrent_skip_list; 
+}; // class concurrent_skip_list 
+ 
+template <typename Traits> 
+bool operator==( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    if (lhs.size() != rhs.size()) return false; 
+#if _MSC_VER 
+    // Passing "unchecked" iterators to std::equal with 3 parameters 
+    // causes compiler warnings. 
+    // The workaround is to use overload with 4 parameters, which is 
+    // available since C++14 - minimally supported version on MSVC 
+    return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); 
+#else 
+    return std::equal(lhs.begin(), lhs.end(), rhs.begin()); 
+#endif 
+} 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template <typename Traits> 
+bool operator!=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return !(lhs == rhs); 
+} 
+#endif 
+ 
+#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT 
+template <typename Traits> 
+tbb::detail::synthesized_three_way_result<typename Traits::value_type> 
+operator<=>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), 
+                                                  rhs.begin(), rhs.end(), 
+                                                  tbb::detail::synthesized_three_way_comparator{}); 
+} 
+#else 
+template <typename Traits> 
+bool operator<( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); 
+} 
+ 
+template <typename Traits> 
+bool operator>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return rhs < lhs; 
+} 
+ 
+template <typename Traits> 
+bool operator<=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return !(rhs < lhs); 
+} 
+ 
+template <typename Traits> 
+bool operator>=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { 
+    return !(lhs < rhs); 
+} 
+#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT 
+ 
+// Generates a number from the interval [0, MaxLevel). 
+template <std::size_t MaxLevel> 
+class concurrent_geometric_level_generator { 
+public: 
+    static constexpr std::size_t max_level = MaxLevel; 
+    // TODO: modify the algorithm to accept other values of max_level 
+    static_assert(max_level == 32, "Incompatible max_level for rng"); 
+ 
+    concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {} 
+ 
+    std::size_t operator()() { 
+        // +1 is required to pass at least 1 into log2 (log2(0) is undefined) 
+        // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1) 
+        std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1; 
+        __TBB_ASSERT(result <= max_level, nullptr); 
+        return result; 
+    } 
+ 
+private: 
+    tbb::enumerable_thread_specific<std::minstd_rand> engines; 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(pop) // warning 4127 is back 
+#endif 
+ 
+#endif // __TBB_detail__concurrent_skip_list_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h
index 3abcce2b29..b81169aaa1 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h
@@ -1,1500 +1,1500 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__concurrent_unordered_base_H
-#define __TBB_detail__concurrent_unordered_base_H
-
-#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H)
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_range_common.h"
-#include "_containers_helpers.h"
-#include "_segment_table.h"
-#include "_hash_compare.h"
-#include "_allocator_traits.h"
-#include "_node_handle.h"
-#include "_assert.h"
-#include "_utils.h"
-#include "_exception.h"
-#include <iterator>
-#include <utility>
-#include <functional>
-#include <initializer_list>
-#include <atomic>
-#include <type_traits>
-#include <memory>
-#include <algorithm>
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // warning C4127: conditional expression is constant
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Traits>
-class concurrent_unordered_base;
-
-template<typename Container, typename Value>
-class solist_iterator {
-private:
-    using node_ptr = typename Container::value_node_ptr;
-    template <typename T, typename Allocator>
-    friend class split_ordered_list;
-    template<typename M, typename V>
-    friend class solist_iterator;
-    template <typename Traits>
-    friend class concurrent_unordered_base;
-    template<typename M, typename T, typename U>
-    friend bool operator==( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j );
-    template<typename M, typename T, typename U>
-    friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j );
-public:
-    using value_type = Value;
-    using difference_type = typename Container::difference_type;
-    using pointer = value_type*;
-    using reference = value_type&;
-    using iterator_category = std::forward_iterator_tag;
-
-    solist_iterator() : my_node_ptr(nullptr) {}
-    solist_iterator( const solist_iterator<Container, typename Container::value_type>& other )
-        : my_node_ptr(other.my_node_ptr) {}
-
-    solist_iterator& operator=( const solist_iterator<Container, typename Container::value_type>& other ) {
-        my_node_ptr = other.my_node_ptr;
-        return *this;
-    }
-
-    reference operator*() const {
-        return my_node_ptr->value();
-    }
-
-    pointer operator->() const {
-        return my_node_ptr->storage();
-    }
-
-    solist_iterator& operator++() {
-        auto next_node = my_node_ptr->next();
-        while(next_node && next_node->is_dummy()) {
-            next_node = next_node->next();
-        }
-        my_node_ptr = static_cast<node_ptr>(next_node);
-        return *this;
-    }
-
-    solist_iterator operator++(int) {
-        solist_iterator tmp = *this;
-        ++*this;
-        return tmp;
-    }
-
-private:
-    solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {}
-
-    node_ptr get_node_ptr() const { return my_node_ptr; }
-
-    node_ptr my_node_ptr;
-};
-
-template<typename Solist, typename T, typename U>
-bool operator==( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) {
-    return i.my_node_ptr == j.my_node_ptr;
-}
-
-template<typename Solist, typename T, typename U>
-bool operator!=( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) {
-    return i.my_node_ptr != j.my_node_ptr;
-}
-
-template <typename SokeyType>
-class list_node {
-public:
-    using node_ptr = list_node*;
-    using sokey_type = SokeyType;
-
-    list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {}
-
-    void init( sokey_type key ) {
-        my_order_key = key;
-    }
-
-    sokey_type order_key() const {
-        return my_order_key;
-    }
-
-    bool is_dummy() {
-        // The last bit of order key is unset for dummy nodes
-        return (my_order_key & 0x1) == 0;
-    }
-
-    node_ptr next() const {
-        return my_next.load(std::memory_order_acquire);
-    }
-
-    void set_next( node_ptr next_node ) {
-        my_next.store(next_node, std::memory_order_release);
-    }
-
-    bool try_set_next( node_ptr expected_next, node_ptr new_next ) {
-        return my_next.compare_exchange_strong(expected_next, new_next);
-    }
-
-private:
-    std::atomic<node_ptr> my_next;
-    sokey_type my_order_key;
-}; // class list_node
-
-template <typename ValueType, typename SokeyType>
-class value_node : public list_node<SokeyType>
-{
-public:
-    using base_type = list_node<SokeyType>;
-    using sokey_type = typename base_type::sokey_type;
-    using value_type = ValueType;
-
-    value_node( sokey_type ord_key ) : base_type(ord_key) {}
-    ~value_node() {}
-    value_type* storage() {
-        return reinterpret_cast<value_type*>(&my_value);
-    }
-
-    value_type& value() {
-        return *storage();
-    }
-
-private:
-    using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type;
-    aligned_storage_type my_value;
-}; // class value_node
-
-template <typename Traits>
-class concurrent_unordered_base {
-    using self_type = concurrent_unordered_base<Traits>;
-    using traits_type = Traits;
-    using hash_compare_type = typename traits_type::hash_compare_type;
-    class unordered_segment_table;
-public:
-    using value_type = typename traits_type::value_type;
-    using key_type = typename traits_type::key_type;
-    using allocator_type = typename traits_type::allocator_type;
-
-private:
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-    // TODO: check assert conditions for different C++ standards
-    static_assert(std::is_same<typename allocator_traits_type::value_type, value_type>::value,
-                  "value_type of the container must be the same as its allocator");
-    using sokey_type = std::size_t;
-
-public:
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-
-    using iterator = solist_iterator<self_type, value_type>;
-    using const_iterator = solist_iterator<self_type, const value_type>;
-    using local_iterator = iterator;
-    using const_local_iterator = const_iterator;
-
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    using hasher = typename hash_compare_type::hasher;
-    using key_equal = typename hash_compare_type::key_equal;
-
-private:
-    using list_node_type = list_node<sokey_type>;
-    using value_node_type = value_node<value_type, sokey_type>;
-    using node_ptr = list_node_type*;
-    using value_node_ptr = value_node_type*;
-
-    using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc<value_node_type>;
-    using node_allocator_type = typename allocator_traits_type::template rebind_alloc<list_node_type>;
-
-    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>;
-    using value_node_allocator_traits = tbb::detail::allocator_traits<value_node_allocator_type>;
-
-    static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) {
-        return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1));
-    }
-
-    template <typename T>
-    using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>;
-public:
-    using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>;
-
-    explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(),
-                                        const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() )
-        : my_size(0),
-          my_bucket_count(round_up_to_power_of_two(bucket_count)),
-          my_max_load_factor(float(initial_max_load_factor)),
-          my_hash_compare(hash, equal),
-          my_head(sokey_type(0)),
-          my_segments(alloc) {}
-
-    concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {}
-
-    concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc )
-        : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {}
-
-    concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc )
-        : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {}
-
-    explicit concurrent_unordered_base( const allocator_type& alloc )
-        : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {}
-
-    template <typename InputIterator>
-    concurrent_unordered_base( InputIterator first, InputIterator last,
-                               size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(),
-                               const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() )
-        : concurrent_unordered_base(bucket_count, hash, equal, alloc)
-    {
-        insert(first, last);
-    }
-
-    template <typename InputIterator>
-    concurrent_unordered_base( InputIterator first, InputIterator last,
-                               size_type bucket_count, const allocator_type& alloc )
-        : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {}
-
-    template <typename InputIterator>
-    concurrent_unordered_base( InputIterator first, InputIterator last,
-                               size_type bucket_count, const hasher& hash, const allocator_type& alloc )
-        : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {}
-
-    concurrent_unordered_base( const concurrent_unordered_base& other )
-        : my_size(other.my_size.load(std::memory_order_relaxed)),
-          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)),
-          my_max_load_factor(other.my_max_load_factor),
-          my_hash_compare(other.my_hash_compare),
-          my_head(other.my_head.order_key()),
-          my_segments(other.my_segments)
-    {
-        try_call( [&] {
-            internal_copy(other);
-        } ).on_exception( [&] {
-            clear();
-        });
-    }
-
-    concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc )
-        : my_size(other.my_size.load(std::memory_order_relaxed)),
-          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)),
-          my_max_load_factor(other.my_max_load_factor),
-          my_hash_compare(other.my_hash_compare),
-          my_head(other.my_head.order_key()),
-          my_segments(other.my_segments, alloc)
-    {
-        try_call( [&] {
-            internal_copy(other);
-        } ).on_exception( [&] {
-            clear();
-        });
-    }
-
-    concurrent_unordered_base( concurrent_unordered_base&& other )
-        : my_size(other.my_size.load(std::memory_order_relaxed)),
-          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)),
-          my_max_load_factor(std::move(other.my_max_load_factor)),
-          my_hash_compare(std::move(other.my_hash_compare)),
-          my_head(other.my_head.order_key()),
-          my_segments(std::move(other.my_segments))
-    {
-        move_content(std::move(other));
-    }
-
-    concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc )
-        : my_size(other.my_size.load(std::memory_order_relaxed)),
-          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)),
-          my_max_load_factor(std::move(other.my_max_load_factor)),
-          my_hash_compare(std::move(other.my_hash_compare)),
-          my_head(other.my_head.order_key()),
-          my_segments(std::move(other.my_segments), alloc)
-    {
-        using is_always_equal = typename allocator_traits_type::is_always_equal;
-        internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal());
-    }
-
-    concurrent_unordered_base( std::initializer_list<value_type> init,
-                               size_type bucket_count = initial_bucket_count,
-                               const hasher& hash = hasher(), const key_equal& equal = key_equal(),
-                               const allocator_type& alloc = allocator_type() )
-        : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {}
-
-    concurrent_unordered_base( std::initializer_list<value_type> init,
-                               size_type bucket_count, const allocator_type& alloc )
-        : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {}
-
-    concurrent_unordered_base( std::initializer_list<value_type> init,
-                               size_type bucket_count, const hasher& hash, const allocator_type& alloc )
-        : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {}
-
-    ~concurrent_unordered_base() {
-        internal_clear();
-    }
-
-    concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) {
-        if (this != &other) {
-            clear();
-            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            my_max_load_factor = other.my_max_load_factor;
-            my_hash_compare = other.my_hash_compare;
-            my_segments = other.my_segments;
-            internal_copy(other); // TODO: guards for exceptions?
-        }
-        return *this;
-    }
-
-    concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) {
-        if (this != &other) {
-            clear();
-            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
-            my_max_load_factor = std::move(other.my_max_load_factor);
-            my_hash_compare = std::move(other.my_hash_compare);
-            my_segments = std::move(other.my_segments);
-
-            using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment;
-            using is_always_equal = typename allocator_traits_type::is_always_equal;
-            internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>());
-        }
-        return *this;
-    }
-
-    concurrent_unordered_base& operator=( std::initializer_list<value_type> init ) {
-        clear();
-        insert(init);
-        return *this;
-    }
-
-    void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) {
-        if (this != &other) {
-            using pocs_type = typename allocator_traits_type::propagate_on_container_swap;
-            using is_always_equal = typename allocator_traits_type::is_always_equal;
-            internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>());
-        }
-    }
-
-    allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); }
-
-    iterator begin() noexcept { return iterator(first_value_node(&my_head)); }
-    const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); }
-    const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); }
-
-    iterator end() noexcept { return iterator(nullptr); }
-    const_iterator end() const noexcept { return const_iterator(nullptr); }
-    const_iterator cend() const noexcept { return const_iterator(nullptr); }
-
-    __TBB_nodiscard bool empty() const noexcept { return size() == 0; }
-    size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); }
-    size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); }
-
-    void clear() noexcept {
-        internal_clear();
-    }
-
-    std::pair<iterator, bool> insert( const value_type& value ) {
-        return internal_insert_value(value);
-    }
-
-    std::pair<iterator, bool> insert( value_type&& value ) {
-        return internal_insert_value(std::move(value));
-    }
-
-    iterator insert( const_iterator, const value_type& value ) {
-        // Ignore hint
-        return insert(value).first;
-    }
-
-    iterator insert( const_iterator, value_type&& value ) {
-        // Ignore hint
-        return insert(std::move(value)).first;
-    }
-
-    template <typename InputIterator>
-    void insert( InputIterator first, InputIterator last ) {
-        for (; first != last; ++first) {
-            insert(*first);
-        }
-    }
-
-    void insert( std::initializer_list<value_type> init ) {
-        insert(init.begin(), init.end());
-    }
-
-    std::pair<iterator, bool> insert( node_type&& nh ) {
-        if (!nh.empty()) {
-            value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh);
-            auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr {
-                insert_node->init(order_key);
-                return insert_node;
-            };
-            auto insert_result = internal_insert(insert_node->value(), init_node);
-            if (insert_result.inserted) {
-                // If the insertion succeeded - set node handle to the empty state
-                __TBB_ASSERT(insert_result.remaining_node == nullptr,
-                            "internal_insert_node should not return the remaining node if the insertion succeeded");
-                node_handle_accessor::deactivate(nh);
-            }
-            return { iterator(insert_result.node_with_equal_key), insert_result.inserted };
-        }
-        return {end(), false};
-    }
-
-    iterator insert( const_iterator, node_type&& nh ) {
-        // Ignore hint
-        return insert(std::move(nh)).first;
-    }
-
-    template <typename... Args>
-    std::pair<iterator, bool> emplace( Args&&... args ) {
-        // Create a node with temporary order_key 0, which will be reinitialize
-        // in internal_insert after the hash calculation
-        value_node_ptr insert_node = create_node(0, std::forward<Args>(args)...);
-
-        auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr {
-            insert_node->init(order_key);
-            return insert_node;
-        };
-
-        auto insert_result = internal_insert(insert_node->value(), init_node);
-
-        if (!insert_result.inserted) {
-            // If the insertion failed - destroy the node which was created
-            insert_node->init(split_order_key_regular(1));
-            destroy_node(insert_node);
-        }
-
-        return { iterator(insert_result.node_with_equal_key), insert_result.inserted };
-    }
-
-    template <typename... Args>
-    iterator emplace_hint( const_iterator, Args&&... args ) {
-        // Ignore hint
-        return emplace(std::forward<Args>(args)...).first;
-    }
-
-    iterator unsafe_erase( const_iterator pos ) {
-        return iterator(first_value_node(internal_erase(pos.get_node_ptr())));
-    }
-
-    iterator unsafe_erase( iterator pos ) {
-        return iterator(first_value_node(internal_erase(pos.get_node_ptr())));
-    }
-
-    iterator unsafe_erase( const_iterator first, const_iterator last ) {
-        while(first != last) {
-            first = unsafe_erase(first);
-        }
-        return iterator(first.get_node_ptr());
-    }
-
-    size_type unsafe_erase( const key_type& key ) {
-        return internal_erase_by_key(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value
-                            && !std::is_convertible<K, const_iterator>::value
-                            && !std::is_convertible<K, iterator>::value,
-                            size_type>::type unsafe_erase( const K& key )
-    {
-        return internal_erase_by_key(key);
-    }
-
-    node_type unsafe_extract( const_iterator pos ) {
-        internal_extract(pos.get_node_ptr());
-        return node_handle_accessor::construct<node_type>(pos.get_node_ptr());
-    }
-
-    node_type unsafe_extract( iterator pos ) {
-        internal_extract(pos.get_node_ptr());
-        return node_handle_accessor::construct<node_type>(pos.get_node_ptr());
-    }
-
-    node_type unsafe_extract( const key_type& key ) {
-        iterator item = find(key);
-        return item == end() ? node_type() : unsafe_extract(item);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value
-                            && !std::is_convertible<K, const_iterator>::value
-                            && !std::is_convertible<K, iterator>::value,
-                            node_type>::type unsafe_extract( const K& key )
-    {
-        iterator item = find(key);
-        return item == end() ? node_type() : unsafe_extract(item);
-    }
-
-    // Lookup functions
-    iterator find( const key_type& key ) {
-        value_node_ptr result = internal_find(key);
-        return result == nullptr ? end() : iterator(result);
-    }
-
-    const_iterator find( const key_type& key ) const {
-        value_node_ptr result = const_cast<self_type*>(this)->internal_find(key);
-        return result == nullptr ? end() : const_iterator(result);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) {
-        value_node_ptr result = internal_find(key);
-        return result == nullptr ? end() : iterator(result);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const {
-        value_node_ptr result = const_cast<self_type*>(this)->internal_find(key);
-        return result == nullptr ? end() : const_iterator(result);
-    }
-
-    std::pair<iterator, iterator> equal_range( const key_type& key ) {
-        auto result = internal_equal_range(key);
-        return std::make_pair(iterator(result.first), iterator(result.second));
-    }
-
-    std::pair<const_iterator, const_iterator> equal_range( const key_type& key ) const {
-        auto result = const_cast<self_type*>(this)->internal_equal_range(key);
-        return std::make_pair(const_iterator(result.first), const_iterator(result.second));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) {
-        auto result = internal_equal_range(key);
-        return std::make_pair(iterator(result.first), iterator(result.second));
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const {
-        auto result = const_cast<self_type*>(this)->internal_equal_range(key);
-        return std::make_pair(iterator(result.first), iterator(result.second));
-    }
-
-    size_type count( const key_type& key ) const {
-        return internal_count(key);
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const {
-        return internal_count(key);
-    }
-
-    bool contains( const key_type& key ) const {
-        return find(key) != end();
-    }
-
-    template <typename K>
-    typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const {
-        return find(key) != end();
-    }
-
-    // Bucket interface
-    local_iterator unsafe_begin( size_type n ) {
-        return local_iterator(first_value_node(get_bucket(n)));
-    }
-
-    const_local_iterator unsafe_begin( size_type n ) const {
-        auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n));
-        return const_local_iterator(bucket_begin);
-    }
-
-    const_local_iterator unsafe_cbegin( size_type n ) const {
-        auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n));
-        return const_local_iterator(bucket_begin);
-    }
-
-    local_iterator unsafe_end( size_type n ) {
-        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed);
-        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr);
-    }
-
-    const_local_iterator unsafe_end( size_type n ) const {
-        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed);
-        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr);
-    }
-
-    const_local_iterator unsafe_cend( size_type n ) const {
-        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed);
-        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr);
-    }
-
-    size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); }
-
-    size_type unsafe_max_bucket_count() const {
-        return max_size();
-    }
-
-    size_type unsafe_bucket_size( size_type n ) const {
-        return size_type(std::distance(unsafe_begin(n), unsafe_end(n)));
-    }
-
-    size_type unsafe_bucket( const key_type& key ) const {
-        return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed);
-    }
-
-    // Hash policy
-    float load_factor() const {
-        return float(size() / float(my_bucket_count.load(std::memory_order_acquire)));
-    }
-
-    float max_load_factor() const { return my_max_load_factor; }
-
-    void max_load_factor( float mlf ) {
-        if (mlf != mlf || mlf < 0) {
-            tbb::detail::throw_exception(exception_id::invalid_load_factor);
-        }
-        my_max_load_factor = mlf;
-    } // TODO: unsafe?
-
-    void rehash( size_type bucket_count ) {
-        size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire);
-        if (current_bucket_count < bucket_count) {
-            // TODO: do we need do-while here?
-            my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count));
-        }
-    }
-
-    void reserve( size_type elements_count ) {
-        size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire);
-        size_type necessary_bucket_count = current_bucket_count;
-
-        do {
-            // TODO: Log2 seems useful here
-            while (necessary_bucket_count * max_load_factor() < elements_count) {
-                necessary_bucket_count <<= 1;
-            }
-        } while (current_bucket_count >= necessary_bucket_count ||
-                 !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count));
-    }
-
-    // Observers
-    hasher hash_function() const { return my_hash_compare.hash_function(); }
-    key_equal key_eq() const { return my_hash_compare.key_eq(); }
-
-    class const_range_type {
-    private:
-        const concurrent_unordered_base& my_instance;
-        node_ptr my_begin_node; // may be node* const
-        node_ptr my_end_node;
-        mutable node_ptr my_midpoint_node;
-    public:
-        using size_type = typename concurrent_unordered_base::size_type;
-        using value_type = typename concurrent_unordered_base::value_type;
-        using reference = typename concurrent_unordered_base::reference;
-        using difference_type = typename concurrent_unordered_base::difference_type;
-        using iterator = typename concurrent_unordered_base::const_iterator;
-
-        bool empty() const { return my_begin_node == my_end_node; }
-
-        bool is_divisible() const {
-            return my_midpoint_node != my_end_node;
-        }
-
-        size_type grainsize() const { return 1; }
-
-        const_range_type( const_range_type& range, split )
-            : my_instance(range.my_instance),
-              my_begin_node(range.my_midpoint_node),
-              my_end_node(range.my_end_node)
-        {
-            range.my_end_node = my_begin_node;
-            __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible");
-            __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible");
-            set_midpoint();
-            range.set_midpoint();
-        }
-
-        iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); }
-        iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); }
-
-        const_range_type( const concurrent_unordered_base& table )
-            : my_instance(table), my_begin_node(const_cast<node_ptr>(&table.my_head)), my_end_node(nullptr)
-        {
-            set_midpoint();
-        }
-    private:
-        void set_midpoint() const {
-            if (my_begin_node == my_end_node) {
-                my_midpoint_node = my_end_node;
-            } else {
-                sokey_type invalid_key = ~sokey_type(0);
-                sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key;
-                sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key;
-
-                size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) %
-                    my_instance.my_bucket_count.load(std::memory_order_relaxed);
-                while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) {
-                    mid_bucket = my_instance.get_parent(mid_bucket);
-                }
-                if (reverse_bits(mid_bucket) > begin_key) {
-                    // Found a dummy node between begin and end
-                    my_midpoint_node = my_instance.first_value_node(
-                        my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed));
-                } else {
-                    // Didn't find a dummy node between begin and end
-                    my_midpoint_node = my_end_node;
-                }
-            }
-        }
-    }; // class const_range_type
-
-    class range_type : public const_range_type {
-    public:
-        using iterator = typename concurrent_unordered_base::iterator;
-        using const_range_type::const_range_type;
-
-        iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); }
-        iterator end() const { return iterator(const_range_type::end().get_node_ptr()); }
-    }; // class range_type
-
-    // Parallel iteration
-    range_type range() {
-        return range_type(*this);
-    }
-
-    const_range_type range() const {
-        return const_range_type(*this);
-    }
-protected:
-    static constexpr bool allow_multimapping = traits_type::allow_multimapping;
-
-private:
-    static constexpr size_type initial_bucket_count = 8;
-    static constexpr float initial_max_load_factor = 4; // TODO: consider 1?
-    static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1;
-
-    class unordered_segment_table
-        : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>
-    {
-        using self_type = unordered_segment_table;
-        using atomic_node_ptr = std::atomic<node_ptr>;
-        using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>;
-        using segment_type = typename base_type::segment_type;
-        using base_allocator_type = typename base_type::allocator_type;
-
-        using segment_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_node_ptr>;
-        using segment_allocator_traits = tbb::detail::allocator_traits<segment_allocator_type>;
-    public:
-        // Segment table for unordered containers should not be extended in the wait- free implementation
-        static constexpr bool allow_table_extending = false;
-        static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable<hasher>::value &&
-                                                       std::is_nothrow_move_assignable<key_equal>::value &&
-                                                       segment_allocator_traits::is_always_equal::value;
-        static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable<hasher>::value &&
-                                                 tbb::detail::is_nothrow_swappable<key_equal>::value &&
-                                                 segment_allocator_traits::is_always_equal::value;
-
-        // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate
-        unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() )
-            : base_type(alloc) {}
-
-        unordered_segment_table( const unordered_segment_table& ) = default;
-
-        unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc )
-            : base_type(other, alloc) {}
-
-        unordered_segment_table( unordered_segment_table&& ) = default;
-
-        unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc )
-            : base_type(std::move(other), alloc) {}
-
-        unordered_segment_table& operator=( const unordered_segment_table& ) = default;
-
-        unordered_segment_table& operator=( unordered_segment_table&& ) = default;
-
-        segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) {
-            segment_allocator_type alloc(this->get_allocator());
-            size_type seg_size = this->segment_size(segment_index);
-            segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size);
-            for (size_type i = 0; i != seg_size; ++i) {
-                segment_allocator_traits::construct(alloc, new_segment + i, nullptr);
-            }
-            return new_segment;
-        }
-
-        // deallocate_segment is required by the segment_table base class, but
-        // in unordered, it is also necessary to call the destructor during deallocation
-        void deallocate_segment( segment_type address, size_type index ) {
-            destroy_segment(address, index);
-        }
-
-        void destroy_segment( segment_type address, size_type index ) {
-            segment_allocator_type alloc(this->get_allocator());
-            for (size_type i = 0; i != this->segment_size(index); ++i) {
-                segment_allocator_traits::destroy(alloc, address + i);
-            }
-            segment_allocator_traits::deallocate(alloc, address, this->segment_size(index));
-        }
-
-
-        void copy_segment( size_type index, segment_type, segment_type to ) {
-            if (index == 0) {
-                // The first element in the first segment is embedded into the table (my_head)
-                // so the first pointer should not be stored here
-                // It would be stored during move ctor/assignment operation
-                to[1].store(nullptr, std::memory_order_relaxed);
-            } else {
-                for (size_type i = 0; i != this->segment_size(index); ++i) {
-                    to[i].store(nullptr, std::memory_order_relaxed);
-                }
-            }
-        }
-
-        void move_segment( size_type index, segment_type from, segment_type to ) {
-            if (index == 0) {
-                // The first element in the first segment is embedded into the table (my_head)
-                // so the first pointer should not be stored here
-                // It would be stored during move ctor/assignment operation
-                to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed);
-            } else {
-                for (size_type i = 0; i != this->segment_size(index); ++i) {
-                    to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed);
-                    from[i].store(nullptr, std::memory_order_relaxed);
-                }
-            }
-        }
-
-        // allocate_long_table is required by the segment_table base class, but unused for unordered containers
-        typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) {
-            __TBB_ASSERT(false, "This method should never been called");
-            // TableType is a pointer
-            return nullptr;
-        }
-
-        // destroy_elements is required by the segment_table base class, but unused for unordered containers
-        // this function call but do nothing
-        void destroy_elements() {}
-    }; // struct unordered_segment_table
-
-    void internal_clear() {
-        // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it
-        node_ptr next = my_head.next();
-        node_ptr curr = next;
-
-        my_head.set_next(nullptr);
-
-        while (curr != nullptr) {
-            next = curr->next();
-            destroy_node(curr);
-            curr = next;
-        }
-
-        my_size.store(0, std::memory_order_relaxed);
-        my_segments.clear();
-    }
-
-    void destroy_node( node_ptr node ) {
-        if (node->is_dummy()) {
-            node_allocator_type dummy_node_allocator(my_segments.get_allocator());
-            // Destroy the node
-            node_allocator_traits::destroy(dummy_node_allocator, node);
-            // Deallocate the memory
-            node_allocator_traits::deallocate(dummy_node_allocator, node, 1);
-        } else {
-            value_node_ptr val_node = static_cast<value_node_ptr>(node);
-            value_node_allocator_type value_node_allocator(my_segments.get_allocator());
-            // Destroy the value
-            value_node_allocator_traits::destroy(value_node_allocator, val_node->storage());
-            // Destroy the node
-            value_node_allocator_traits::destroy(value_node_allocator, val_node);
-            // Deallocate the memory
-            value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1);
-        }
-    }
-
-    struct internal_insert_return_type {
-        // If the insertion failed - the remaining_node points to the node, which was failed to insert
-        // This node can be allocated in process of insertion
-        value_node_ptr remaining_node;
-        // If the insertion failed - node_with_equal_key points to the node in the list with the
-        // key, equivalent to the inserted, otherwise it points to the node, which was inserted.
-        value_node_ptr node_with_equal_key;
-        // Insertion status
-        // NOTE: if it is true - remaining_node should be nullptr
-        bool inserted;
-    }; // struct internal_insert_return_type
-
-    // Inserts the value into the split ordered list
-    template <typename ValueType>
-    std::pair<iterator, bool> internal_insert_value( ValueType&& value ) {
-
-        auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr {
-            return create_node(order_key, std::forward<ValueType>(value));
-        };
-
-        auto insert_result = internal_insert(value, create_value_node);
-
-        if (insert_result.remaining_node != nullptr) {
-            // If the insertion fails - destroy the node which was failed to insert if it exist
-            __TBB_ASSERT(!insert_result.inserted,
-                         "remaining_node should be nullptr if the node was successfully inserted");
-            destroy_node(insert_result.remaining_node);
-        }
-
-        return { iterator(insert_result.node_with_equal_key), insert_result.inserted };
-    }
-
-    // Inserts the node into the split ordered list
-    // Creates a node using the specified callback after the place for insertion was found
-    // Returns internal_insert_return_type object, where:
-    //     - If the insertion succeeded:
-    //         - remaining_node is nullptr
-    //         - node_with_equal_key point to the inserted node
-    //         - inserted is true
-    //     - If the insertion failed:
-    //         - remaining_node points to the node, that was failed to insert if it was created.
-    //           nullptr if the node was not created, because the requested key was already
-    //           presented in the list
-    //         - node_with_equal_key point to the element in the list with the key, equivalent to
-    //           to the requested key
-    //         - inserted is false
-    template <typename ValueType, typename CreateInsertNode>
-    internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) {
-        static_assert(std::is_same<typename std::decay<ValueType>::type, value_type>::value,
-                      "Incorrect type in internal_insert");
-        const key_type& key = traits_type::get_key(value);
-        sokey_type hash_key = sokey_type(my_hash_compare(key));
-
-        sokey_type order_key = split_order_key_regular(hash_key);
-        node_ptr prev = prepare_bucket(hash_key);
-        __TBB_ASSERT(prev != nullptr, "Invalid head node");
-
-        auto search_result = search_after(prev, order_key, key);
-
-        if (search_result.second) {
-            return internal_insert_return_type{ nullptr, search_result.first, false };
-        }
-
-        value_node_ptr new_node = create_insert_node(order_key);
-        node_ptr curr = search_result.first;
-
-        while (!try_insert(prev, new_node, curr)) {
-            search_result = search_after(prev, order_key, key);
-            if (search_result.second) {
-                return internal_insert_return_type{ new_node, search_result.first, false };
-            }
-            curr = search_result.first;
-        }
-
-        auto sz = my_size.fetch_add(1);
-        adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire));
-        return internal_insert_return_type{ nullptr, static_cast<value_node_ptr>(new_node), true };
-    }
-
-    // Searches the node with the key, equivalent to key with requested order key after the node prev
-    // Returns the existing node and true if the node is already in the list
-    // Returns the first node with the order key, greater than requested and false if the node is not presented in the list
-    std::pair<value_node_ptr, bool> search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) {
-        // NOTE: static_cast<value_node_ptr>(curr) should be done only after we would ensure
-        // that the node is not a dummy node
-
-        node_ptr curr = prev->next();
-
-        while (curr != nullptr && (curr->order_key() < order_key ||
-               (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key))))
-        {
-            prev = curr;
-            curr = curr->next();
-        }
-
-        if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) {
-            return { static_cast<value_node_ptr>(curr), true };
-        }
-        return { static_cast<value_node_ptr>(curr), false };
-    }
-
-    void adjust_table_size( size_type total_elements, size_type current_size ) {
-        // Grow the table by a factor of 2 if possible and needed
-        if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) {
-            // Double the size of the hash only if size hash not changed in between loads
-            my_bucket_count.compare_exchange_strong(current_size, 2u * current_size);
-        }
-    }
-
-    node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) {
-        node_ptr prev_node = parent_dummy_node;
-
-        node_ptr dummy_node = create_dummy_node(order_key);
-        node_ptr next_node;
-
-        do {
-            next_node = prev_node->next();
-            // Move forward through the list while the order key is less than requested
-            while (next_node != nullptr && next_node->order_key() < order_key) {
-                prev_node = next_node;
-                next_node = next_node->next();
-            }
-
-            if (next_node != nullptr && next_node->order_key() == order_key) {
-                // Another dummy node with the same order key was inserted by another thread
-                // Destroy the node and exit
-                destroy_node(dummy_node);
-                return next_node;
-            }
-        } while (!try_insert(prev_node, dummy_node, next_node));
-
-        return dummy_node;
-    }
-
-    // Try to insert a node between prev_node and expected next
-    // If the next is not equal to expected next - return false
-    static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) {
-        new_node->set_next(current_next_node);
-        return prev_node->try_set_next(current_next_node, new_node);
-    }
-
-    // Returns the bucket, associated with the hash_key
-    node_ptr prepare_bucket( sokey_type hash_key ) {
-        size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire);
-        return get_bucket(bucket);
-    }
-
-    // Initialize the corresponding bucket if it is not initialized
-    node_ptr get_bucket( size_type bucket_index ) {
-        if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) {
-            init_bucket(bucket_index);
-        }
-        return my_segments[bucket_index].load(std::memory_order_acquire);
-    }
-
-    void init_bucket( size_type bucket ) {
-        if (bucket == 0) {
-            // Atomicaly store the first bucket into my_head
-            node_ptr disabled = nullptr;
-            my_segments[0].compare_exchange_strong(disabled, &my_head);
-            return;
-        }
-
-        size_type parent_bucket = get_parent(bucket);
-
-        while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) {
-            // Initialize all of the parent buckets
-            init_bucket(parent_bucket);
-        }
-
-        __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized");
-        node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire);
-
-        // Insert dummy node into the list
-        node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket));
-        // TODO: consider returning pair<node_ptr, bool> to avoid store operation if the bucket was stored by an other thread
-        // or move store to insert_dummy_node
-        // Add dummy_node into the segment table
-        my_segments[bucket].store(dummy_node, std::memory_order_release);
-    }
-
-    node_ptr create_dummy_node( sokey_type order_key ) {
-        node_allocator_type dummy_node_allocator(my_segments.get_allocator());
-        node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1);
-        node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key);
-        return dummy_node;
-    }
-
-    template <typename... Args>
-    value_node_ptr create_node( sokey_type order_key, Args&&... args ) {
-        value_node_allocator_type value_node_allocator(my_segments.get_allocator());
-        // Allocate memory for the value_node
-        value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1);
-        // Construct the node
-        value_node_allocator_traits::construct(value_node_allocator, new_node, order_key);
-
-        // try_call API is not convenient here due to broken
-        // variadic capture on GCC 4.8.5
-        auto value_guard = make_raii_guard([&] {
-            value_node_allocator_traits::destroy(value_node_allocator, new_node);
-            value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1);
-        });
-
-        // Construct the value in the node
-        value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward<Args>(args)...);
-        value_guard.dismiss();
-        return new_node;
-    }
-
-    value_node_ptr first_value_node( node_ptr first_node ) const {
-        while (first_node != nullptr && first_node->is_dummy()) {
-            first_node = first_node->next();
-        }
-        return static_cast<value_node_ptr>(first_node);
-    }
-
-    // Unsafe method, which removes the node from the list and returns the next node
-    node_ptr internal_erase( value_node_ptr node_to_erase ) {
-        __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase");
-        node_ptr next_node = node_to_erase->next();
-        internal_extract(node_to_erase);
-        destroy_node(node_to_erase);
-        return next_node;
-    }
-
-    template <typename K>
-    size_type internal_erase_by_key( const K& key ) {
-        // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket
-        // for each unsafe_erase call
-        auto eq_range = equal_range(key);
-        size_type erased_count = 0;
-
-        for (auto it = eq_range.first; it != eq_range.second;) {
-            it = unsafe_erase(it);
-            ++erased_count;
-        }
-        return erased_count;
-    }
-
-    // Unsafe method, which extracts the node from the list
-    void internal_extract( value_node_ptr node_to_extract ) {
-        const key_type& key = traits_type::get_key(node_to_extract->value());
-        sokey_type hash_key = sokey_type(my_hash_compare(key));
-
-        node_ptr prev_node = prepare_bucket(hash_key);
-
-        for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) {
-            if (node == node_to_extract) {
-                unlink_node(prev_node, node, node_to_extract->next());
-                my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
-                return;
-            }
-            __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(),
-                         "node, which is going to be extracted should be presented in the list");
-        }
-    }
-
-protected:
-    template <typename SourceType>
-    void internal_merge( SourceType&& source ) {
-        static_assert(std::is_same<node_type, typename std::decay<SourceType>::type::node_type>::value,
-                      "Incompatible containers cannot be merged");
-
-        for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) {
-            if (!source_prev->next()->is_dummy()) {
-                value_node_ptr curr = static_cast<value_node_ptr>(source_prev->next());
-                // If the multimapping is allowed, or the key is not presented
-                // in the *this container - extract the node from the list
-                if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) {
-                    node_ptr next_node = curr->next();
-                    source.unlink_node(source_prev, curr, next_node);
-
-                    // Remember the old order key
-                    sokey_type old_order_key = curr->order_key();
-
-                    // Node handle with curr cannot be used directly in insert call, because
-                    // the destructor of node_type will destroy curr
-                    node_type curr_node = node_handle_accessor::construct<node_type>(curr);
-
-                    // If the insertion fails - return ownership of the node to the source
-                    if (!insert(std::move(curr_node)).second) {
-                        __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer");
-                        __TBB_ASSERT(source_prev->next() == next_node,
-                                     "Concurrent operations with the source container in merge are prohibited");
-
-                        // Initialize the node with the old order key, because the order key
-                        // can change during the insertion
-                        curr->init(old_order_key);
-                        __TBB_ASSERT(old_order_key >= source_prev->order_key() &&
-                                     (next_node == nullptr || old_order_key <= next_node->order_key()),
-                                     "Wrong nodes order in the source container");
-                        // Merge is unsafe for source container, so the insertion back can be done without compare_exchange
-                        curr->set_next(next_node);
-                        source_prev->set_next(curr);
-                        source_prev = curr;
-                        node_handle_accessor::deactivate(curr_node);
-                    } else {
-                        source.my_size.fetch_sub(1, std::memory_order_relaxed);
-                    }
-                } else {
-                    source_prev = curr;
-                }
-            } else {
-                source_prev = source_prev->next();
-            }
-        }
-    }
-
-private:
-    // Unsafe method, which unlinks the node between prev and next
-    void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) {
-        __TBB_ASSERT(prev_node->next() == node_to_unlink &&
-                     node_to_unlink->next() == next_node,
-                     "erasing and extracting nodes from the containers are unsafe in concurrent mode");
-        prev_node->set_next(next_node);
-        node_to_unlink->set_next(nullptr);
-    }
-
-    template <typename K>
-    value_node_ptr internal_find( const K& key ) {
-        sokey_type hash_key = sokey_type(my_hash_compare(key));
-        sokey_type order_key = split_order_key_regular(hash_key);
-
-        node_ptr curr = prepare_bucket(hash_key);
-
-        while (curr != nullptr) {
-            if (curr->order_key() > order_key) {
-                // If the order key is greater than the requested order key,
-                // the element is not in the hash table
-                return nullptr;
-            } else if (curr->order_key() == order_key &&
-                       my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) {
-                // The fact that order keys match does not mean that the element is found.
-                // Key function comparison has to be performed to check whether this is the
-                // right element. If not, keep searching while order key is the same.
-                return static_cast<value_node_ptr>(curr);
-            }
-            curr = curr->next();
-        }
-
-        return nullptr;
-    }
-
-    template <typename K>
-    std::pair<value_node_ptr, value_node_ptr> internal_equal_range( const K& key ) {
-        sokey_type hash_key = sokey_type(my_hash_compare(key));
-        sokey_type order_key = split_order_key_regular(hash_key);
-
-        node_ptr curr = prepare_bucket(hash_key);
-
-        while (curr != nullptr) {
-            if (curr->order_key() > order_key) {
-                // If the order key is greater than the requested order key,
-                // the element is not in the hash table
-                return std::make_pair(nullptr, nullptr);
-            } else if (curr->order_key() == order_key &&
-                       my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) {
-                value_node_ptr first = static_cast<value_node_ptr>(curr);
-                node_ptr last = first;
-                do {
-                    last = last->next();
-                } while (allow_multimapping && last != nullptr && !last->is_dummy() &&
-                        my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(last)->value()), key));
-                return std::make_pair(first, first_value_node(last));
-            }
-            curr = curr->next();
-        }
-        return {nullptr, nullptr};
-    }
-
-    template <typename K>
-    size_type internal_count( const K& key ) const {
-        if (allow_multimapping) {
-            // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance
-            auto eq_range = equal_range(key);
-            return std::distance(eq_range.first, eq_range.second);
-        } else {
-            return contains(key) ? 1 : 0;
-        }
-    }
-
-    void internal_copy( const concurrent_unordered_base& other ) {
-        node_ptr last_node = &my_head;
-        my_segments[0].store(&my_head, std::memory_order_relaxed);
-
-        for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) {
-            node_ptr new_node;
-            if (!node->is_dummy()) {
-                // The node in the right table contains a value
-                new_node = create_node(node->order_key(), static_cast<value_node_ptr>(node)->value());
-            } else {
-                // The node in the right table is a dummy node
-                new_node = create_dummy_node(node->order_key());
-                my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed);
-            }
-
-            last_node->set_next(new_node);
-            last_node = new_node;
-        }
-    }
-
-    void internal_move( concurrent_unordered_base&& other ) {
-        node_ptr last_node = &my_head;
-        my_segments[0].store(&my_head, std::memory_order_relaxed);
-
-        for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) {
-            node_ptr new_node;
-            if (!node->is_dummy()) {
-                // The node in the right table contains a value
-                new_node = create_node(node->order_key(), std::move(static_cast<value_node_ptr>(node)->value()));
-            } else {
-                // TODO: do we need to destroy a dummy node in the right container?
-                // The node in the right table is a dummy_node
-                new_node = create_dummy_node(node->order_key());
-                my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed);
-            }
-
-            last_node->set_next(new_node);
-            last_node = new_node;
-        }
-    }
-
-    void move_content( concurrent_unordered_base&& other ) {
-        // NOTE: allocators should be equal
-        my_head.set_next(other.my_head.next());
-        other.my_head.set_next(nullptr);
-        my_segments[0].store(&my_head, std::memory_order_relaxed);
-
-        other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed);
-        other.my_max_load_factor = initial_max_load_factor;
-        other.my_size.store(0, std::memory_order_relaxed);
-    }
-
-    void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&,
-                                                 /*is_always_equal = */std::true_type ) {
-        // Allocators are always equal - no need to compare for equality
-        move_content(std::move(other));
-    }
-
-    void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc,
-                                                 /*is_always_equal = */std::false_type ) {
-        // Allocators are not always equal
-        if (alloc == other.my_segments.get_allocator()) {
-            move_content(std::move(other));
-        } else {
-            try_call( [&] {
-                internal_move(std::move(other));
-            } ).on_exception( [&] {
-                clear();
-            });
-        }
-    }
-
-    // Move assigns the hash table to other is any instances of allocator_type are always equal
-    // or propagate_on_container_move_assignment is true
-    void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) {
-        move_content(std::move(other));
-    }
-
-    // Move assigns the hash table to other is any instances of allocator_type are not always equal
-    // and propagate_on_container_move_assignment is false
-    void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) {
-        if (my_segments.get_allocator() == other.my_segments.get_allocator()) {
-            move_content(std::move(other));
-        } else {
-            // TODO: guards for exceptions
-            internal_move(std::move(other));
-        }
-    }
-
-    void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) {
-        internal_swap_fields(other);
-    }
-
-    void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) {
-        __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(),
-                     "Swapping with unequal allocators is not allowed");
-        internal_swap_fields(other);
-    }
-
-    void internal_swap_fields( concurrent_unordered_base& other ) {
-        node_ptr first_node = my_head.next();
-        my_head.set_next(other.my_head.next());
-        other.my_head.set_next(first_node);
-
-        size_type current_size = my_size.load(std::memory_order_relaxed);
-        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_size.store(current_size, std::memory_order_relaxed);
-
-        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed);
-        my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        other.my_bucket_count.store(bucket_count, std::memory_order_relaxed);
-
-        using std::swap;
-        swap(my_max_load_factor, other.my_max_load_factor);
-        swap(my_hash_compare, other.my_hash_compare);
-        my_segments.swap(other.my_segments);
-
-        // swap() method from segment table swaps all of the segments including the first segment
-        // We should restore it to my_head. Without it the first segment of the container will point
-        // to other.my_head.
-        my_segments[0].store(&my_head, std::memory_order_relaxed);
-        other.my_segments[0].store(&other.my_head, std::memory_order_relaxed);
-    }
-
-    // A regular order key has its original hash value reversed and the last bit set
-    static constexpr sokey_type split_order_key_regular( sokey_type hash ) {
-        return reverse_bits(hash) | 0x1;
-    }
-
-    // A dummy order key has its original hash value reversed and the last bit unset
-    static constexpr sokey_type split_order_key_dummy( sokey_type hash ) {
-        return reverse_bits(hash) & ~sokey_type(0x1);
-    }
-
-    size_type get_parent( size_type bucket ) const {
-        // Unset bucket's most significant turned-on bit
-        __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0");
-        size_type msb = tbb::detail::log2(bucket);
-        return bucket & ~(size_type(1) << msb);
-    }
-
-    size_type get_next_bucket_index( size_type bucket ) const {
-        size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed));
-        size_type reversed_next = reverse_n_bits(bucket, bits) + 1;
-        return reverse_n_bits(reversed_next, bits);
-    }
-
-    std::atomic<size_type> my_size;
-    std::atomic<size_type> my_bucket_count;
-    float my_max_load_factor;
-    hash_compare_type my_hash_compare;
-
-    list_node_type my_head; // Head node for split ordered list
-    unordered_segment_table my_segments; // Segment table of pointers to nodes
-
-    template <typename Container, typename Value>
-    friend class solist_iterator;
-
-    template <typename OtherTraits>
-    friend class concurrent_unordered_base;
-}; // class concurrent_unordered_base
-
-template <typename Traits>
-bool operator==( const concurrent_unordered_base<Traits>& lhs,
-                 const concurrent_unordered_base<Traits>& rhs ) {
-    if (&lhs == &rhs) { return true; }
-    if (lhs.size() != rhs.size()) { return false; }
-
-#if _MSC_VER
-    // Passing "unchecked" iterators to std::permutation with 3 parameters
-    // causes compiler warnings.
-    // The workaround is to use overload with 4 parameters, which is
-    // available since C++14 - minimally supported version on MSVC
-    return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-#else
-    return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin());
-#endif
-}
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template <typename Traits>
-bool operator!=( const concurrent_unordered_base<Traits>& lhs,
-                 const concurrent_unordered_base<Traits>& rhs ) {
-    return !(lhs == rhs);
-}
-#endif
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(pop) // warning 4127 is back
-#endif
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__concurrent_unordered_base_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__concurrent_unordered_base_H 
+#define __TBB_detail__concurrent_unordered_base_H 
+ 
+#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+#include "_range_common.h" 
+#include "_containers_helpers.h" 
+#include "_segment_table.h" 
+#include "_hash_compare.h" 
+#include "_allocator_traits.h" 
+#include "_node_handle.h" 
+#include "_assert.h" 
+#include "_utils.h" 
+#include "_exception.h" 
+#include <iterator> 
+#include <utility> 
+#include <functional> 
+#include <initializer_list> 
+#include <atomic> 
+#include <type_traits> 
+#include <memory> 
+#include <algorithm> 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(push) 
+#pragma warning(disable: 4127) // warning C4127: conditional expression is constant 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Traits> 
+class concurrent_unordered_base; 
+ 
+template<typename Container, typename Value> 
+class solist_iterator { 
+private: 
+    using node_ptr = typename Container::value_node_ptr; 
+    template <typename T, typename Allocator> 
+    friend class split_ordered_list; 
+    template<typename M, typename V> 
+    friend class solist_iterator; 
+    template <typename Traits> 
+    friend class concurrent_unordered_base; 
+    template<typename M, typename T, typename U> 
+    friend bool operator==( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); 
+    template<typename M, typename T, typename U> 
+    friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); 
+public: 
+    using value_type = Value; 
+    using difference_type = typename Container::difference_type; 
+    using pointer = value_type*; 
+    using reference = value_type&; 
+    using iterator_category = std::forward_iterator_tag; 
+ 
+    solist_iterator() : my_node_ptr(nullptr) {} 
+    solist_iterator( const solist_iterator<Container, typename Container::value_type>& other ) 
+        : my_node_ptr(other.my_node_ptr) {} 
+ 
+    solist_iterator& operator=( const solist_iterator<Container, typename Container::value_type>& other ) { 
+        my_node_ptr = other.my_node_ptr; 
+        return *this; 
+    } 
+ 
+    reference operator*() const { 
+        return my_node_ptr->value(); 
+    } 
+ 
+    pointer operator->() const { 
+        return my_node_ptr->storage(); 
+    } 
+ 
+    solist_iterator& operator++() { 
+        auto next_node = my_node_ptr->next(); 
+        while(next_node && next_node->is_dummy()) { 
+            next_node = next_node->next(); 
+        } 
+        my_node_ptr = static_cast<node_ptr>(next_node); 
+        return *this; 
+    } 
+ 
+    solist_iterator operator++(int) { 
+        solist_iterator tmp = *this; 
+        ++*this; 
+        return tmp; 
+    } 
+ 
+private: 
+    solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {} 
+ 
+    node_ptr get_node_ptr() const { return my_node_ptr; } 
+ 
+    node_ptr my_node_ptr; 
+}; 
+ 
+template<typename Solist, typename T, typename U> 
+bool operator==( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { 
+    return i.my_node_ptr == j.my_node_ptr; 
+} 
+ 
+template<typename Solist, typename T, typename U> 
+bool operator!=( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { 
+    return i.my_node_ptr != j.my_node_ptr; 
+} 
+ 
+template <typename SokeyType> 
+class list_node { 
+public: 
+    using node_ptr = list_node*; 
+    using sokey_type = SokeyType; 
+ 
+    list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {} 
+ 
+    void init( sokey_type key ) { 
+        my_order_key = key; 
+    } 
+ 
+    sokey_type order_key() const { 
+        return my_order_key; 
+    } 
+ 
+    bool is_dummy() { 
+        // The last bit of order key is unset for dummy nodes 
+        return (my_order_key & 0x1) == 0; 
+    } 
+ 
+    node_ptr next() const { 
+        return my_next.load(std::memory_order_acquire); 
+    } 
+ 
+    void set_next( node_ptr next_node ) { 
+        my_next.store(next_node, std::memory_order_release); 
+    } 
+ 
+    bool try_set_next( node_ptr expected_next, node_ptr new_next ) { 
+        return my_next.compare_exchange_strong(expected_next, new_next); 
+    } 
+ 
+private: 
+    std::atomic<node_ptr> my_next; 
+    sokey_type my_order_key; 
+}; // class list_node 
+ 
+template <typename ValueType, typename SokeyType> 
+class value_node : public list_node<SokeyType> 
+{ 
+public: 
+    using base_type = list_node<SokeyType>; 
+    using sokey_type = typename base_type::sokey_type; 
+    using value_type = ValueType; 
+ 
+    value_node( sokey_type ord_key ) : base_type(ord_key) {} 
+    ~value_node() {} 
+    value_type* storage() { 
+        return reinterpret_cast<value_type*>(&my_value); 
+    } 
+ 
+    value_type& value() { 
+        return *storage(); 
+    } 
+ 
+private: 
+    using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type; 
+    aligned_storage_type my_value; 
+}; // class value_node 
+ 
+template <typename Traits> 
+class concurrent_unordered_base { 
+    using self_type = concurrent_unordered_base<Traits>; 
+    using traits_type = Traits; 
+    using hash_compare_type = typename traits_type::hash_compare_type; 
+    class unordered_segment_table; 
+public: 
+    using value_type = typename traits_type::value_type; 
+    using key_type = typename traits_type::key_type; 
+    using allocator_type = typename traits_type::allocator_type; 
+ 
+private: 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+    // TODO: check assert conditions for different C++ standards 
+    static_assert(std::is_same<typename allocator_traits_type::value_type, value_type>::value, 
+                  "value_type of the container must be the same as its allocator"); 
+    using sokey_type = std::size_t; 
+ 
+public: 
+    using size_type = std::size_t; 
+    using difference_type = std::ptrdiff_t; 
+ 
+    using iterator = solist_iterator<self_type, value_type>; 
+    using const_iterator = solist_iterator<self_type, const value_type>; 
+    using local_iterator = iterator; 
+    using const_local_iterator = const_iterator; 
+ 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    using hasher = typename hash_compare_type::hasher; 
+    using key_equal = typename hash_compare_type::key_equal; 
+ 
+private: 
+    using list_node_type = list_node<sokey_type>; 
+    using value_node_type = value_node<value_type, sokey_type>; 
+    using node_ptr = list_node_type*; 
+    using value_node_ptr = value_node_type*; 
+ 
+    using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc<value_node_type>; 
+    using node_allocator_type = typename allocator_traits_type::template rebind_alloc<list_node_type>; 
+ 
+    using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; 
+    using value_node_allocator_traits = tbb::detail::allocator_traits<value_node_allocator_type>; 
+ 
+    static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) { 
+        return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1)); 
+    } 
+ 
+    template <typename T> 
+    using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>; 
+public: 
+    using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>; 
+ 
+    explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), 
+                                        const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) 
+        : my_size(0), 
+          my_bucket_count(round_up_to_power_of_two(bucket_count)), 
+          my_max_load_factor(float(initial_max_load_factor)), 
+          my_hash_compare(hash, equal), 
+          my_head(sokey_type(0)), 
+          my_segments(alloc) {} 
+ 
+    concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {} 
+ 
+    concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc ) 
+        : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {} 
+ 
+    concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc ) 
+        : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {} 
+ 
+    explicit concurrent_unordered_base( const allocator_type& alloc ) 
+        : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {} 
+ 
+    template <typename InputIterator> 
+    concurrent_unordered_base( InputIterator first, InputIterator last, 
+                               size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), 
+                               const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) 
+        : concurrent_unordered_base(bucket_count, hash, equal, alloc) 
+    { 
+        insert(first, last); 
+    } 
+ 
+    template <typename InputIterator> 
+    concurrent_unordered_base( InputIterator first, InputIterator last, 
+                               size_type bucket_count, const allocator_type& alloc ) 
+        : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {} 
+ 
+    template <typename InputIterator> 
+    concurrent_unordered_base( InputIterator first, InputIterator last, 
+                               size_type bucket_count, const hasher& hash, const allocator_type& alloc ) 
+        : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {} 
+ 
+    concurrent_unordered_base( const concurrent_unordered_base& other ) 
+        : my_size(other.my_size.load(std::memory_order_relaxed)), 
+          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), 
+          my_max_load_factor(other.my_max_load_factor), 
+          my_hash_compare(other.my_hash_compare), 
+          my_head(other.my_head.order_key()), 
+          my_segments(other.my_segments) 
+    { 
+        try_call( [&] { 
+            internal_copy(other); 
+        } ).on_exception( [&] { 
+            clear(); 
+        }); 
+    } 
+ 
+    concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc ) 
+        : my_size(other.my_size.load(std::memory_order_relaxed)), 
+          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), 
+          my_max_load_factor(other.my_max_load_factor), 
+          my_hash_compare(other.my_hash_compare), 
+          my_head(other.my_head.order_key()), 
+          my_segments(other.my_segments, alloc) 
+    { 
+        try_call( [&] { 
+            internal_copy(other); 
+        } ).on_exception( [&] { 
+            clear(); 
+        }); 
+    } 
+ 
+    concurrent_unordered_base( concurrent_unordered_base&& other ) 
+        : my_size(other.my_size.load(std::memory_order_relaxed)), 
+          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), 
+          my_max_load_factor(std::move(other.my_max_load_factor)), 
+          my_hash_compare(std::move(other.my_hash_compare)), 
+          my_head(other.my_head.order_key()), 
+          my_segments(std::move(other.my_segments)) 
+    { 
+        move_content(std::move(other)); 
+    } 
+ 
+    concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc ) 
+        : my_size(other.my_size.load(std::memory_order_relaxed)), 
+          my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), 
+          my_max_load_factor(std::move(other.my_max_load_factor)), 
+          my_hash_compare(std::move(other.my_hash_compare)), 
+          my_head(other.my_head.order_key()), 
+          my_segments(std::move(other.my_segments), alloc) 
+    { 
+        using is_always_equal = typename allocator_traits_type::is_always_equal; 
+        internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal()); 
+    } 
+ 
+    concurrent_unordered_base( std::initializer_list<value_type> init, 
+                               size_type bucket_count = initial_bucket_count, 
+                               const hasher& hash = hasher(), const key_equal& equal = key_equal(), 
+                               const allocator_type& alloc = allocator_type() ) 
+        : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {} 
+ 
+    concurrent_unordered_base( std::initializer_list<value_type> init, 
+                               size_type bucket_count, const allocator_type& alloc ) 
+        : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {} 
+ 
+    concurrent_unordered_base( std::initializer_list<value_type> init, 
+                               size_type bucket_count, const hasher& hash, const allocator_type& alloc ) 
+        : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {} 
+ 
+    ~concurrent_unordered_base() { 
+        internal_clear(); 
+    } 
+ 
+    concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) { 
+        if (this != &other) { 
+            clear(); 
+            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            my_max_load_factor = other.my_max_load_factor; 
+            my_hash_compare = other.my_hash_compare; 
+            my_segments = other.my_segments; 
+            internal_copy(other); // TODO: guards for exceptions? 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) { 
+        if (this != &other) { 
+            clear(); 
+            my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            my_max_load_factor = std::move(other.my_max_load_factor); 
+            my_hash_compare = std::move(other.my_hash_compare); 
+            my_segments = std::move(other.my_segments); 
+ 
+            using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment; 
+            using is_always_equal = typename allocator_traits_type::is_always_equal; 
+            internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); 
+        } 
+        return *this; 
+    } 
+ 
+    concurrent_unordered_base& operator=( std::initializer_list<value_type> init ) { 
+        clear(); 
+        insert(init); 
+        return *this; 
+    } 
+ 
+    void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) { 
+        if (this != &other) { 
+            using pocs_type = typename allocator_traits_type::propagate_on_container_swap; 
+            using is_always_equal = typename allocator_traits_type::is_always_equal; 
+            internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); 
+        } 
+    } 
+ 
+    allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); } 
+ 
+    iterator begin() noexcept { return iterator(first_value_node(&my_head)); } 
+    const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } 
+    const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } 
+ 
+    iterator end() noexcept { return iterator(nullptr); } 
+    const_iterator end() const noexcept { return const_iterator(nullptr); } 
+    const_iterator cend() const noexcept { return const_iterator(nullptr); } 
+ 
+    __TBB_nodiscard bool empty() const noexcept { return size() == 0; } 
+    size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); } 
+    size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); } 
+ 
+    void clear() noexcept { 
+        internal_clear(); 
+    } 
+ 
+    std::pair<iterator, bool> insert( const value_type& value ) { 
+        return internal_insert_value(value); 
+    } 
+ 
+    std::pair<iterator, bool> insert( value_type&& value ) { 
+        return internal_insert_value(std::move(value)); 
+    } 
+ 
+    iterator insert( const_iterator, const value_type& value ) { 
+        // Ignore hint 
+        return insert(value).first; 
+    } 
+ 
+    iterator insert( const_iterator, value_type&& value ) { 
+        // Ignore hint 
+        return insert(std::move(value)).first; 
+    } 
+ 
+    template <typename InputIterator> 
+    void insert( InputIterator first, InputIterator last ) { 
+        for (; first != last; ++first) { 
+            insert(*first); 
+        } 
+    } 
+ 
+    void insert( std::initializer_list<value_type> init ) { 
+        insert(init.begin(), init.end()); 
+    } 
+ 
+    std::pair<iterator, bool> insert( node_type&& nh ) { 
+        if (!nh.empty()) { 
+            value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); 
+            auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { 
+                insert_node->init(order_key); 
+                return insert_node; 
+            }; 
+            auto insert_result = internal_insert(insert_node->value(), init_node); 
+            if (insert_result.inserted) { 
+                // If the insertion succeeded - set node handle to the empty state 
+                __TBB_ASSERT(insert_result.remaining_node == nullptr, 
+                            "internal_insert_node should not return the remaining node if the insertion succeeded"); 
+                node_handle_accessor::deactivate(nh); 
+            } 
+            return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; 
+        } 
+        return {end(), false}; 
+    } 
+ 
+    iterator insert( const_iterator, node_type&& nh ) { 
+        // Ignore hint 
+        return insert(std::move(nh)).first; 
+    } 
+ 
+    template <typename... Args> 
+    std::pair<iterator, bool> emplace( Args&&... args ) { 
+        // Create a node with temporary order_key 0, which will be reinitialize 
+        // in internal_insert after the hash calculation 
+        value_node_ptr insert_node = create_node(0, std::forward<Args>(args)...); 
+ 
+        auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { 
+            insert_node->init(order_key); 
+            return insert_node; 
+        }; 
+ 
+        auto insert_result = internal_insert(insert_node->value(), init_node); 
+ 
+        if (!insert_result.inserted) { 
+            // If the insertion failed - destroy the node which was created 
+            insert_node->init(split_order_key_regular(1)); 
+            destroy_node(insert_node); 
+        } 
+ 
+        return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; 
+    } 
+ 
+    template <typename... Args> 
+    iterator emplace_hint( const_iterator, Args&&... args ) { 
+        // Ignore hint 
+        return emplace(std::forward<Args>(args)...).first; 
+    } 
+ 
+    iterator unsafe_erase( const_iterator pos ) { 
+        return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); 
+    } 
+ 
+    iterator unsafe_erase( iterator pos ) { 
+        return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); 
+    } 
+ 
+    iterator unsafe_erase( const_iterator first, const_iterator last ) { 
+        while(first != last) { 
+            first = unsafe_erase(first); 
+        } 
+        return iterator(first.get_node_ptr()); 
+    } 
+ 
+    size_type unsafe_erase( const key_type& key ) { 
+        return internal_erase_by_key(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value 
+                            && !std::is_convertible<K, const_iterator>::value 
+                            && !std::is_convertible<K, iterator>::value, 
+                            size_type>::type unsafe_erase( const K& key ) 
+    { 
+        return internal_erase_by_key(key); 
+    } 
+ 
+    node_type unsafe_extract( const_iterator pos ) { 
+        internal_extract(pos.get_node_ptr()); 
+        return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); 
+    } 
+ 
+    node_type unsafe_extract( iterator pos ) { 
+        internal_extract(pos.get_node_ptr()); 
+        return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); 
+    } 
+ 
+    node_type unsafe_extract( const key_type& key ) { 
+        iterator item = find(key); 
+        return item == end() ? node_type() : unsafe_extract(item); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value 
+                            && !std::is_convertible<K, const_iterator>::value 
+                            && !std::is_convertible<K, iterator>::value, 
+                            node_type>::type unsafe_extract( const K& key ) 
+    { 
+        iterator item = find(key); 
+        return item == end() ? node_type() : unsafe_extract(item); 
+    } 
+ 
+    // Lookup functions 
+    iterator find( const key_type& key ) { 
+        value_node_ptr result = internal_find(key); 
+        return result == nullptr ? end() : iterator(result); 
+    } 
+ 
+    const_iterator find( const key_type& key ) const { 
+        value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); 
+        return result == nullptr ? end() : const_iterator(result); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { 
+        value_node_ptr result = internal_find(key); 
+        return result == nullptr ? end() : iterator(result); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { 
+        value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); 
+        return result == nullptr ? end() : const_iterator(result); 
+    } 
+ 
+    std::pair<iterator, iterator> equal_range( const key_type& key ) { 
+        auto result = internal_equal_range(key); 
+        return std::make_pair(iterator(result.first), iterator(result.second)); 
+    } 
+ 
+    std::pair<const_iterator, const_iterator> equal_range( const key_type& key ) const { 
+        auto result = const_cast<self_type*>(this)->internal_equal_range(key); 
+        return std::make_pair(const_iterator(result.first), const_iterator(result.second)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { 
+        auto result = internal_equal_range(key); 
+        return std::make_pair(iterator(result.first), iterator(result.second)); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { 
+        auto result = const_cast<self_type*>(this)->internal_equal_range(key); 
+        return std::make_pair(iterator(result.first), iterator(result.second)); 
+    } 
+ 
+    size_type count( const key_type& key ) const { 
+        return internal_count(key); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { 
+        return internal_count(key); 
+    } 
+ 
+    bool contains( const key_type& key ) const { 
+        return find(key) != end(); 
+    } 
+ 
+    template <typename K> 
+    typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { 
+        return find(key) != end(); 
+    } 
+ 
+    // Bucket interface 
+    local_iterator unsafe_begin( size_type n ) { 
+        return local_iterator(first_value_node(get_bucket(n))); 
+    } 
+ 
+    const_local_iterator unsafe_begin( size_type n ) const { 
+        auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); 
+        return const_local_iterator(bucket_begin); 
+    } 
+ 
+    const_local_iterator unsafe_cbegin( size_type n ) const { 
+        auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); 
+        return const_local_iterator(bucket_begin); 
+    } 
+ 
+    local_iterator unsafe_end( size_type n ) { 
+        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); 
+        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr); 
+    } 
+ 
+    const_local_iterator unsafe_end( size_type n ) const { 
+        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); 
+        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); 
+    } 
+ 
+    const_local_iterator unsafe_cend( size_type n ) const { 
+        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); 
+        return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); 
+    } 
+ 
+    size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); } 
+ 
+    size_type unsafe_max_bucket_count() const { 
+        return max_size(); 
+    } 
+ 
+    size_type unsafe_bucket_size( size_type n ) const { 
+        return size_type(std::distance(unsafe_begin(n), unsafe_end(n))); 
+    } 
+ 
+    size_type unsafe_bucket( const key_type& key ) const { 
+        return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed); 
+    } 
+ 
+    // Hash policy 
+    float load_factor() const { 
+        return float(size() / float(my_bucket_count.load(std::memory_order_acquire))); 
+    } 
+ 
+    float max_load_factor() const { return my_max_load_factor; } 
+ 
+    void max_load_factor( float mlf ) { 
+        if (mlf != mlf || mlf < 0) { 
+            tbb::detail::throw_exception(exception_id::invalid_load_factor); 
+        } 
+        my_max_load_factor = mlf; 
+    } // TODO: unsafe? 
+ 
+    void rehash( size_type bucket_count ) { 
+        size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); 
+        if (current_bucket_count < bucket_count) { 
+            // TODO: do we need do-while here? 
+            my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count)); 
+        } 
+    } 
+ 
+    void reserve( size_type elements_count ) { 
+        size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); 
+        size_type necessary_bucket_count = current_bucket_count; 
+ 
+        do { 
+            // TODO: Log2 seems useful here 
+            while (necessary_bucket_count * max_load_factor() < elements_count) { 
+                necessary_bucket_count <<= 1; 
+            } 
+        } while (current_bucket_count >= necessary_bucket_count || 
+                 !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count)); 
+    } 
+ 
+    // Observers 
+    hasher hash_function() const { return my_hash_compare.hash_function(); } 
+    key_equal key_eq() const { return my_hash_compare.key_eq(); } 
+ 
+    class const_range_type { 
+    private: 
+        const concurrent_unordered_base& my_instance; 
+        node_ptr my_begin_node; // may be node* const 
+        node_ptr my_end_node; 
+        mutable node_ptr my_midpoint_node; 
+    public: 
+        using size_type = typename concurrent_unordered_base::size_type; 
+        using value_type = typename concurrent_unordered_base::value_type; 
+        using reference = typename concurrent_unordered_base::reference; 
+        using difference_type = typename concurrent_unordered_base::difference_type; 
+        using iterator = typename concurrent_unordered_base::const_iterator; 
+ 
+        bool empty() const { return my_begin_node == my_end_node; } 
+ 
+        bool is_divisible() const { 
+            return my_midpoint_node != my_end_node; 
+        } 
+ 
+        size_type grainsize() const { return 1; } 
+ 
+        const_range_type( const_range_type& range, split ) 
+            : my_instance(range.my_instance), 
+              my_begin_node(range.my_midpoint_node), 
+              my_end_node(range.my_end_node) 
+        { 
+            range.my_end_node = my_begin_node; 
+            __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible"); 
+            __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible"); 
+            set_midpoint(); 
+            range.set_midpoint(); 
+        } 
+ 
+        iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); } 
+        iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); } 
+ 
+        const_range_type( const concurrent_unordered_base& table ) 
+            : my_instance(table), my_begin_node(const_cast<node_ptr>(&table.my_head)), my_end_node(nullptr) 
+        { 
+            set_midpoint(); 
+        } 
+    private: 
+        void set_midpoint() const { 
+            if (my_begin_node == my_end_node) { 
+                my_midpoint_node = my_end_node; 
+            } else { 
+                sokey_type invalid_key = ~sokey_type(0); 
+                sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key; 
+                sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key; 
+ 
+                size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) % 
+                    my_instance.my_bucket_count.load(std::memory_order_relaxed); 
+                while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) { 
+                    mid_bucket = my_instance.get_parent(mid_bucket); 
+                } 
+                if (reverse_bits(mid_bucket) > begin_key) { 
+                    // Found a dummy node between begin and end 
+                    my_midpoint_node = my_instance.first_value_node( 
+                        my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed)); 
+                } else { 
+                    // Didn't find a dummy node between begin and end 
+                    my_midpoint_node = my_end_node; 
+                } 
+            } 
+        } 
+    }; // class const_range_type 
+ 
+    class range_type : public const_range_type { 
+    public: 
+        using iterator = typename concurrent_unordered_base::iterator; 
+        using const_range_type::const_range_type; 
+ 
+        iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); } 
+        iterator end() const { return iterator(const_range_type::end().get_node_ptr()); } 
+    }; // class range_type 
+ 
+    // Parallel iteration 
+    range_type range() { 
+        return range_type(*this); 
+    } 
+ 
+    const_range_type range() const { 
+        return const_range_type(*this); 
+    } 
+protected: 
+    static constexpr bool allow_multimapping = traits_type::allow_multimapping; 
+ 
+private: 
+    static constexpr size_type initial_bucket_count = 8; 
+    static constexpr float initial_max_load_factor = 4; // TODO: consider 1? 
+    static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; 
+ 
+    class unordered_segment_table 
+        : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> 
+    { 
+        using self_type = unordered_segment_table; 
+        using atomic_node_ptr = std::atomic<node_ptr>; 
+        using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; 
+        using segment_type = typename base_type::segment_type; 
+        using base_allocator_type = typename base_type::allocator_type; 
+ 
+        using segment_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_node_ptr>; 
+        using segment_allocator_traits = tbb::detail::allocator_traits<segment_allocator_type>; 
+    public: 
+        // Segment table for unordered containers should not be extended in the wait- free implementation 
+        static constexpr bool allow_table_extending = false; 
+        static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable<hasher>::value && 
+                                                       std::is_nothrow_move_assignable<key_equal>::value && 
+                                                       segment_allocator_traits::is_always_equal::value; 
+        static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable<hasher>::value && 
+                                                 tbb::detail::is_nothrow_swappable<key_equal>::value && 
+                                                 segment_allocator_traits::is_always_equal::value; 
+ 
+        // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate 
+        unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() ) 
+            : base_type(alloc) {} 
+ 
+        unordered_segment_table( const unordered_segment_table& ) = default; 
+ 
+        unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc ) 
+            : base_type(other, alloc) {} 
+ 
+        unordered_segment_table( unordered_segment_table&& ) = default; 
+ 
+        unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc ) 
+            : base_type(std::move(other), alloc) {} 
+ 
+        unordered_segment_table& operator=( const unordered_segment_table& ) = default; 
+ 
+        unordered_segment_table& operator=( unordered_segment_table&& ) = default; 
+ 
+        segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) { 
+            segment_allocator_type alloc(this->get_allocator()); 
+            size_type seg_size = this->segment_size(segment_index); 
+            segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size); 
+            for (size_type i = 0; i != seg_size; ++i) { 
+                segment_allocator_traits::construct(alloc, new_segment + i, nullptr); 
+            } 
+            return new_segment; 
+        } 
+ 
+        // deallocate_segment is required by the segment_table base class, but 
+        // in unordered, it is also necessary to call the destructor during deallocation 
+        void deallocate_segment( segment_type address, size_type index ) { 
+            destroy_segment(address, index); 
+        } 
+ 
+        void destroy_segment( segment_type address, size_type index ) { 
+            segment_allocator_type alloc(this->get_allocator()); 
+            for (size_type i = 0; i != this->segment_size(index); ++i) { 
+                segment_allocator_traits::destroy(alloc, address + i); 
+            } 
+            segment_allocator_traits::deallocate(alloc, address, this->segment_size(index)); 
+        } 
+ 
+ 
+        void copy_segment( size_type index, segment_type, segment_type to ) { 
+            if (index == 0) { 
+                // The first element in the first segment is embedded into the table (my_head) 
+                // so the first pointer should not be stored here 
+                // It would be stored during move ctor/assignment operation 
+                to[1].store(nullptr, std::memory_order_relaxed); 
+            } else { 
+                for (size_type i = 0; i != this->segment_size(index); ++i) { 
+                    to[i].store(nullptr, std::memory_order_relaxed); 
+                } 
+            } 
+        } 
+ 
+        void move_segment( size_type index, segment_type from, segment_type to ) { 
+            if (index == 0) { 
+                // The first element in the first segment is embedded into the table (my_head) 
+                // so the first pointer should not be stored here 
+                // It would be stored during move ctor/assignment operation 
+                to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed); 
+            } else { 
+                for (size_type i = 0; i != this->segment_size(index); ++i) { 
+                    to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed); 
+                    from[i].store(nullptr, std::memory_order_relaxed); 
+                } 
+            } 
+        } 
+ 
+        // allocate_long_table is required by the segment_table base class, but unused for unordered containers 
+        typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) { 
+            __TBB_ASSERT(false, "This method should never been called"); 
+            // TableType is a pointer 
+            return nullptr; 
+        } 
+ 
+        // destroy_elements is required by the segment_table base class, but unused for unordered containers 
+        // this function call but do nothing 
+        void destroy_elements() {} 
+    }; // struct unordered_segment_table 
+ 
+    void internal_clear() { 
+        // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it 
+        node_ptr next = my_head.next(); 
+        node_ptr curr = next; 
+ 
+        my_head.set_next(nullptr); 
+ 
+        while (curr != nullptr) { 
+            next = curr->next(); 
+            destroy_node(curr); 
+            curr = next; 
+        } 
+ 
+        my_size.store(0, std::memory_order_relaxed); 
+        my_segments.clear(); 
+    } 
+ 
+    void destroy_node( node_ptr node ) { 
+        if (node->is_dummy()) { 
+            node_allocator_type dummy_node_allocator(my_segments.get_allocator()); 
+            // Destroy the node 
+            node_allocator_traits::destroy(dummy_node_allocator, node); 
+            // Deallocate the memory 
+            node_allocator_traits::deallocate(dummy_node_allocator, node, 1); 
+        } else { 
+            value_node_ptr val_node = static_cast<value_node_ptr>(node); 
+            value_node_allocator_type value_node_allocator(my_segments.get_allocator()); 
+            // Destroy the value 
+            value_node_allocator_traits::destroy(value_node_allocator, val_node->storage()); 
+            // Destroy the node 
+            value_node_allocator_traits::destroy(value_node_allocator, val_node); 
+            // Deallocate the memory 
+            value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1); 
+        } 
+    } 
+ 
+    struct internal_insert_return_type { 
+        // If the insertion failed - the remaining_node points to the node, which was failed to insert 
+        // This node can be allocated in process of insertion 
+        value_node_ptr remaining_node; 
+        // If the insertion failed - node_with_equal_key points to the node in the list with the 
+        // key, equivalent to the inserted, otherwise it points to the node, which was inserted. 
+        value_node_ptr node_with_equal_key; 
+        // Insertion status 
+        // NOTE: if it is true - remaining_node should be nullptr 
+        bool inserted; 
+    }; // struct internal_insert_return_type 
+ 
+    // Inserts the value into the split ordered list 
+    template <typename ValueType> 
+    std::pair<iterator, bool> internal_insert_value( ValueType&& value ) { 
+ 
+        auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr { 
+            return create_node(order_key, std::forward<ValueType>(value)); 
+        }; 
+ 
+        auto insert_result = internal_insert(value, create_value_node); 
+ 
+        if (insert_result.remaining_node != nullptr) { 
+            // If the insertion fails - destroy the node which was failed to insert if it exist 
+            __TBB_ASSERT(!insert_result.inserted, 
+                         "remaining_node should be nullptr if the node was successfully inserted"); 
+            destroy_node(insert_result.remaining_node); 
+        } 
+ 
+        return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; 
+    } 
+ 
+    // Inserts the node into the split ordered list 
+    // Creates a node using the specified callback after the place for insertion was found 
+    // Returns internal_insert_return_type object, where: 
+    //     - If the insertion succeeded: 
+    //         - remaining_node is nullptr 
+    //         - node_with_equal_key point to the inserted node 
+    //         - inserted is true 
+    //     - If the insertion failed: 
+    //         - remaining_node points to the node, that was failed to insert if it was created. 
+    //           nullptr if the node was not created, because the requested key was already 
+    //           presented in the list 
+    //         - node_with_equal_key point to the element in the list with the key, equivalent to 
+    //           to the requested key 
+    //         - inserted is false 
+    template <typename ValueType, typename CreateInsertNode> 
+    internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) { 
+        static_assert(std::is_same<typename std::decay<ValueType>::type, value_type>::value, 
+                      "Incorrect type in internal_insert"); 
+        const key_type& key = traits_type::get_key(value); 
+        sokey_type hash_key = sokey_type(my_hash_compare(key)); 
+ 
+        sokey_type order_key = split_order_key_regular(hash_key); 
+        node_ptr prev = prepare_bucket(hash_key); 
+        __TBB_ASSERT(prev != nullptr, "Invalid head node"); 
+ 
+        auto search_result = search_after(prev, order_key, key); 
+ 
+        if (search_result.second) { 
+            return internal_insert_return_type{ nullptr, search_result.first, false }; 
+        } 
+ 
+        value_node_ptr new_node = create_insert_node(order_key); 
+        node_ptr curr = search_result.first; 
+ 
+        while (!try_insert(prev, new_node, curr)) { 
+            search_result = search_after(prev, order_key, key); 
+            if (search_result.second) { 
+                return internal_insert_return_type{ new_node, search_result.first, false }; 
+            } 
+            curr = search_result.first; 
+        } 
+ 
+        auto sz = my_size.fetch_add(1); 
+        adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire)); 
+        return internal_insert_return_type{ nullptr, static_cast<value_node_ptr>(new_node), true }; 
+    } 
+ 
+    // Searches the node with the key, equivalent to key with requested order key after the node prev 
+    // Returns the existing node and true if the node is already in the list 
+    // Returns the first node with the order key, greater than requested and false if the node is not presented in the list 
+    std::pair<value_node_ptr, bool> search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) { 
+        // NOTE: static_cast<value_node_ptr>(curr) should be done only after we would ensure 
+        // that the node is not a dummy node 
+ 
+        node_ptr curr = prev->next(); 
+ 
+        while (curr != nullptr && (curr->order_key() < order_key || 
+               (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)))) 
+        { 
+            prev = curr; 
+            curr = curr->next(); 
+        } 
+ 
+        if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) { 
+            return { static_cast<value_node_ptr>(curr), true }; 
+        } 
+        return { static_cast<value_node_ptr>(curr), false }; 
+    } 
+ 
+    void adjust_table_size( size_type total_elements, size_type current_size ) { 
+        // Grow the table by a factor of 2 if possible and needed 
+        if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) { 
+            // Double the size of the hash only if size hash not changed in between loads 
+            my_bucket_count.compare_exchange_strong(current_size, 2u * current_size); 
+        } 
+    } 
+ 
+    node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) { 
+        node_ptr prev_node = parent_dummy_node; 
+ 
+        node_ptr dummy_node = create_dummy_node(order_key); 
+        node_ptr next_node; 
+ 
+        do { 
+            next_node = prev_node->next(); 
+            // Move forward through the list while the order key is less than requested 
+            while (next_node != nullptr && next_node->order_key() < order_key) { 
+                prev_node = next_node; 
+                next_node = next_node->next(); 
+            } 
+ 
+            if (next_node != nullptr && next_node->order_key() == order_key) { 
+                // Another dummy node with the same order key was inserted by another thread 
+                // Destroy the node and exit 
+                destroy_node(dummy_node); 
+                return next_node; 
+            } 
+        } while (!try_insert(prev_node, dummy_node, next_node)); 
+ 
+        return dummy_node; 
+    } 
+ 
+    // Try to insert a node between prev_node and expected next 
+    // If the next is not equal to expected next - return false 
+    static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) { 
+        new_node->set_next(current_next_node); 
+        return prev_node->try_set_next(current_next_node, new_node); 
+    } 
+ 
+    // Returns the bucket, associated with the hash_key 
+    node_ptr prepare_bucket( sokey_type hash_key ) { 
+        size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire); 
+        return get_bucket(bucket); 
+    } 
+ 
+    // Initialize the corresponding bucket if it is not initialized 
+    node_ptr get_bucket( size_type bucket_index ) { 
+        if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) { 
+            init_bucket(bucket_index); 
+        } 
+        return my_segments[bucket_index].load(std::memory_order_acquire); 
+    } 
+ 
+    void init_bucket( size_type bucket ) { 
+        if (bucket == 0) { 
+            // Atomicaly store the first bucket into my_head 
+            node_ptr disabled = nullptr; 
+            my_segments[0].compare_exchange_strong(disabled, &my_head); 
+            return; 
+        } 
+ 
+        size_type parent_bucket = get_parent(bucket); 
+ 
+        while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) { 
+            // Initialize all of the parent buckets 
+            init_bucket(parent_bucket); 
+        } 
+ 
+        __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized"); 
+        node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire); 
+ 
+        // Insert dummy node into the list 
+        node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket)); 
+        // TODO: consider returning pair<node_ptr, bool> to avoid store operation if the bucket was stored by an other thread 
+        // or move store to insert_dummy_node 
+        // Add dummy_node into the segment table 
+        my_segments[bucket].store(dummy_node, std::memory_order_release); 
+    } 
+ 
+    node_ptr create_dummy_node( sokey_type order_key ) { 
+        node_allocator_type dummy_node_allocator(my_segments.get_allocator()); 
+        node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1); 
+        node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key); 
+        return dummy_node; 
+    } 
+ 
+    template <typename... Args> 
+    value_node_ptr create_node( sokey_type order_key, Args&&... args ) { 
+        value_node_allocator_type value_node_allocator(my_segments.get_allocator()); 
+        // Allocate memory for the value_node 
+        value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1); 
+        // Construct the node 
+        value_node_allocator_traits::construct(value_node_allocator, new_node, order_key); 
+ 
+        // try_call API is not convenient here due to broken 
+        // variadic capture on GCC 4.8.5 
+        auto value_guard = make_raii_guard([&] { 
+            value_node_allocator_traits::destroy(value_node_allocator, new_node); 
+            value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1); 
+        }); 
+ 
+        // Construct the value in the node 
+        value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward<Args>(args)...); 
+        value_guard.dismiss(); 
+        return new_node; 
+    } 
+ 
+    value_node_ptr first_value_node( node_ptr first_node ) const { 
+        while (first_node != nullptr && first_node->is_dummy()) { 
+            first_node = first_node->next(); 
+        } 
+        return static_cast<value_node_ptr>(first_node); 
+    } 
+ 
+    // Unsafe method, which removes the node from the list and returns the next node 
+    node_ptr internal_erase( value_node_ptr node_to_erase ) { 
+        __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase"); 
+        node_ptr next_node = node_to_erase->next(); 
+        internal_extract(node_to_erase); 
+        destroy_node(node_to_erase); 
+        return next_node; 
+    } 
+ 
+    template <typename K> 
+    size_type internal_erase_by_key( const K& key ) { 
+        // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket 
+        // for each unsafe_erase call 
+        auto eq_range = equal_range(key); 
+        size_type erased_count = 0; 
+ 
+        for (auto it = eq_range.first; it != eq_range.second;) { 
+            it = unsafe_erase(it); 
+            ++erased_count; 
+        } 
+        return erased_count; 
+    } 
+ 
+    // Unsafe method, which extracts the node from the list 
+    void internal_extract( value_node_ptr node_to_extract ) { 
+        const key_type& key = traits_type::get_key(node_to_extract->value()); 
+        sokey_type hash_key = sokey_type(my_hash_compare(key)); 
+ 
+        node_ptr prev_node = prepare_bucket(hash_key); 
+ 
+        for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) { 
+            if (node == node_to_extract) { 
+                unlink_node(prev_node, node, node_to_extract->next()); 
+                my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); 
+                return; 
+            } 
+            __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(), 
+                         "node, which is going to be extracted should be presented in the list"); 
+        } 
+    } 
+ 
+protected: 
+    template <typename SourceType> 
+    void internal_merge( SourceType&& source ) { 
+        static_assert(std::is_same<node_type, typename std::decay<SourceType>::type::node_type>::value, 
+                      "Incompatible containers cannot be merged"); 
+ 
+        for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) { 
+            if (!source_prev->next()->is_dummy()) { 
+                value_node_ptr curr = static_cast<value_node_ptr>(source_prev->next()); 
+                // If the multimapping is allowed, or the key is not presented 
+                // in the *this container - extract the node from the list 
+                if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) { 
+                    node_ptr next_node = curr->next(); 
+                    source.unlink_node(source_prev, curr, next_node); 
+ 
+                    // Remember the old order key 
+                    sokey_type old_order_key = curr->order_key(); 
+ 
+                    // Node handle with curr cannot be used directly in insert call, because 
+                    // the destructor of node_type will destroy curr 
+                    node_type curr_node = node_handle_accessor::construct<node_type>(curr); 
+ 
+                    // If the insertion fails - return ownership of the node to the source 
+                    if (!insert(std::move(curr_node)).second) { 
+                        __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer"); 
+                        __TBB_ASSERT(source_prev->next() == next_node, 
+                                     "Concurrent operations with the source container in merge are prohibited"); 
+ 
+                        // Initialize the node with the old order key, because the order key 
+                        // can change during the insertion 
+                        curr->init(old_order_key); 
+                        __TBB_ASSERT(old_order_key >= source_prev->order_key() && 
+                                     (next_node == nullptr || old_order_key <= next_node->order_key()), 
+                                     "Wrong nodes order in the source container"); 
+                        // Merge is unsafe for source container, so the insertion back can be done without compare_exchange 
+                        curr->set_next(next_node); 
+                        source_prev->set_next(curr); 
+                        source_prev = curr; 
+                        node_handle_accessor::deactivate(curr_node); 
+                    } else { 
+                        source.my_size.fetch_sub(1, std::memory_order_relaxed); 
+                    } 
+                } else { 
+                    source_prev = curr; 
+                } 
+            } else { 
+                source_prev = source_prev->next(); 
+            } 
+        } 
+    } 
+ 
+private: 
+    // Unsafe method, which unlinks the node between prev and next 
+    void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) { 
+        __TBB_ASSERT(prev_node->next() == node_to_unlink && 
+                     node_to_unlink->next() == next_node, 
+                     "erasing and extracting nodes from the containers are unsafe in concurrent mode"); 
+        prev_node->set_next(next_node); 
+        node_to_unlink->set_next(nullptr); 
+    } 
+ 
+    template <typename K> 
+    value_node_ptr internal_find( const K& key ) { 
+        sokey_type hash_key = sokey_type(my_hash_compare(key)); 
+        sokey_type order_key = split_order_key_regular(hash_key); 
+ 
+        node_ptr curr = prepare_bucket(hash_key); 
+ 
+        while (curr != nullptr) { 
+            if (curr->order_key() > order_key) { 
+                // If the order key is greater than the requested order key, 
+                // the element is not in the hash table 
+                return nullptr; 
+            } else if (curr->order_key() == order_key && 
+                       my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { 
+                // The fact that order keys match does not mean that the element is found. 
+                // Key function comparison has to be performed to check whether this is the 
+                // right element. If not, keep searching while order key is the same. 
+                return static_cast<value_node_ptr>(curr); 
+            } 
+            curr = curr->next(); 
+        } 
+ 
+        return nullptr; 
+    } 
+ 
+    template <typename K> 
+    std::pair<value_node_ptr, value_node_ptr> internal_equal_range( const K& key ) { 
+        sokey_type hash_key = sokey_type(my_hash_compare(key)); 
+        sokey_type order_key = split_order_key_regular(hash_key); 
+ 
+        node_ptr curr = prepare_bucket(hash_key); 
+ 
+        while (curr != nullptr) { 
+            if (curr->order_key() > order_key) { 
+                // If the order key is greater than the requested order key, 
+                // the element is not in the hash table 
+                return std::make_pair(nullptr, nullptr); 
+            } else if (curr->order_key() == order_key && 
+                       my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { 
+                value_node_ptr first = static_cast<value_node_ptr>(curr); 
+                node_ptr last = first; 
+                do { 
+                    last = last->next(); 
+                } while (allow_multimapping && last != nullptr && !last->is_dummy() && 
+                        my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(last)->value()), key)); 
+                return std::make_pair(first, first_value_node(last)); 
+            } 
+            curr = curr->next(); 
+        } 
+        return {nullptr, nullptr}; 
+    } 
+ 
+    template <typename K> 
+    size_type internal_count( const K& key ) const { 
+        if (allow_multimapping) { 
+            // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance 
+            auto eq_range = equal_range(key); 
+            return std::distance(eq_range.first, eq_range.second); 
+        } else { 
+            return contains(key) ? 1 : 0; 
+        } 
+    } 
+ 
+    void internal_copy( const concurrent_unordered_base& other ) { 
+        node_ptr last_node = &my_head; 
+        my_segments[0].store(&my_head, std::memory_order_relaxed); 
+ 
+        for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { 
+            node_ptr new_node; 
+            if (!node->is_dummy()) { 
+                // The node in the right table contains a value 
+                new_node = create_node(node->order_key(), static_cast<value_node_ptr>(node)->value()); 
+            } else { 
+                // The node in the right table is a dummy node 
+                new_node = create_dummy_node(node->order_key()); 
+                my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); 
+            } 
+ 
+            last_node->set_next(new_node); 
+            last_node = new_node; 
+        } 
+    } 
+ 
+    void internal_move( concurrent_unordered_base&& other ) { 
+        node_ptr last_node = &my_head; 
+        my_segments[0].store(&my_head, std::memory_order_relaxed); 
+ 
+        for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { 
+            node_ptr new_node; 
+            if (!node->is_dummy()) { 
+                // The node in the right table contains a value 
+                new_node = create_node(node->order_key(), std::move(static_cast<value_node_ptr>(node)->value())); 
+            } else { 
+                // TODO: do we need to destroy a dummy node in the right container? 
+                // The node in the right table is a dummy_node 
+                new_node = create_dummy_node(node->order_key()); 
+                my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); 
+            } 
+ 
+            last_node->set_next(new_node); 
+            last_node = new_node; 
+        } 
+    } 
+ 
+    void move_content( concurrent_unordered_base&& other ) { 
+        // NOTE: allocators should be equal 
+        my_head.set_next(other.my_head.next()); 
+        other.my_head.set_next(nullptr); 
+        my_segments[0].store(&my_head, std::memory_order_relaxed); 
+ 
+        other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed); 
+        other.my_max_load_factor = initial_max_load_factor; 
+        other.my_size.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&, 
+                                                 /*is_always_equal = */std::true_type ) { 
+        // Allocators are always equal - no need to compare for equality 
+        move_content(std::move(other)); 
+    } 
+ 
+    void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc, 
+                                                 /*is_always_equal = */std::false_type ) { 
+        // Allocators are not always equal 
+        if (alloc == other.my_segments.get_allocator()) { 
+            move_content(std::move(other)); 
+        } else { 
+            try_call( [&] { 
+                internal_move(std::move(other)); 
+            } ).on_exception( [&] { 
+                clear(); 
+            }); 
+        } 
+    } 
+ 
+    // Move assigns the hash table to other is any instances of allocator_type are always equal 
+    // or propagate_on_container_move_assignment is true 
+    void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) { 
+        move_content(std::move(other)); 
+    } 
+ 
+    // Move assigns the hash table to other is any instances of allocator_type are not always equal 
+    // and propagate_on_container_move_assignment is false 
+    void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) { 
+        if (my_segments.get_allocator() == other.my_segments.get_allocator()) { 
+            move_content(std::move(other)); 
+        } else { 
+            // TODO: guards for exceptions 
+            internal_move(std::move(other)); 
+        } 
+    } 
+ 
+    void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) { 
+        internal_swap_fields(other); 
+    } 
+ 
+    void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) { 
+        __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(), 
+                     "Swapping with unequal allocators is not allowed"); 
+        internal_swap_fields(other); 
+    } 
+ 
+    void internal_swap_fields( concurrent_unordered_base& other ) { 
+        node_ptr first_node = my_head.next(); 
+        my_head.set_next(other.my_head.next()); 
+        other.my_head.set_next(first_node); 
+ 
+        size_type current_size = my_size.load(std::memory_order_relaxed); 
+        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_size.store(current_size, std::memory_order_relaxed); 
+ 
+        size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); 
+        my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        other.my_bucket_count.store(bucket_count, std::memory_order_relaxed); 
+ 
+        using std::swap; 
+        swap(my_max_load_factor, other.my_max_load_factor); 
+        swap(my_hash_compare, other.my_hash_compare); 
+        my_segments.swap(other.my_segments); 
+ 
+        // swap() method from segment table swaps all of the segments including the first segment 
+        // We should restore it to my_head. Without it the first segment of the container will point 
+        // to other.my_head. 
+        my_segments[0].store(&my_head, std::memory_order_relaxed); 
+        other.my_segments[0].store(&other.my_head, std::memory_order_relaxed); 
+    } 
+ 
+    // A regular order key has its original hash value reversed and the last bit set 
+    static constexpr sokey_type split_order_key_regular( sokey_type hash ) { 
+        return reverse_bits(hash) | 0x1; 
+    } 
+ 
+    // A dummy order key has its original hash value reversed and the last bit unset 
+    static constexpr sokey_type split_order_key_dummy( sokey_type hash ) { 
+        return reverse_bits(hash) & ~sokey_type(0x1); 
+    } 
+ 
+    size_type get_parent( size_type bucket ) const { 
+        // Unset bucket's most significant turned-on bit 
+        __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0"); 
+        size_type msb = tbb::detail::log2(bucket); 
+        return bucket & ~(size_type(1) << msb); 
+    } 
+ 
+    size_type get_next_bucket_index( size_type bucket ) const { 
+        size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed)); 
+        size_type reversed_next = reverse_n_bits(bucket, bits) + 1; 
+        return reverse_n_bits(reversed_next, bits); 
+    } 
+ 
+    std::atomic<size_type> my_size; 
+    std::atomic<size_type> my_bucket_count; 
+    float my_max_load_factor; 
+    hash_compare_type my_hash_compare; 
+ 
+    list_node_type my_head; // Head node for split ordered list 
+    unordered_segment_table my_segments; // Segment table of pointers to nodes 
+ 
+    template <typename Container, typename Value> 
+    friend class solist_iterator; 
+ 
+    template <typename OtherTraits> 
+    friend class concurrent_unordered_base; 
+}; // class concurrent_unordered_base 
+ 
+template <typename Traits> 
+bool operator==( const concurrent_unordered_base<Traits>& lhs, 
+                 const concurrent_unordered_base<Traits>& rhs ) { 
+    if (&lhs == &rhs) { return true; } 
+    if (lhs.size() != rhs.size()) { return false; } 
+ 
+#if _MSC_VER 
+    // Passing "unchecked" iterators to std::permutation with 3 parameters 
+    // causes compiler warnings. 
+    // The workaround is to use overload with 4 parameters, which is 
+    // available since C++14 - minimally supported version on MSVC 
+    return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); 
+#else 
+    return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); 
+#endif 
+} 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template <typename Traits> 
+bool operator!=( const concurrent_unordered_base<Traits>& lhs, 
+                 const concurrent_unordered_base<Traits>& rhs ) { 
+    return !(lhs == rhs); 
+} 
+#endif 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(pop) // warning 4127 is back 
+#endif 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__concurrent_unordered_base_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h
index 251ebb8d82..1f9b0fff13 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h
@@ -1,483 +1,483 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__config_H
-#define __TBB_detail__config_H
-
-/** This header is supposed to contain macro definitions only.
-    The macros defined here are intended to control such aspects of TBB build as
-    - presence of compiler features
-    - compilation modes
-    - feature sets
-    - known compiler/platform issues
-**/
-
-/* Check which standard library we use. */
-#include <cstddef>
-
-#if _MSC_VER
-    #define __TBB_EXPORTED_FUNC   __cdecl
-    #define __TBB_EXPORTED_METHOD __thiscall
-#else
-    #define __TBB_EXPORTED_FUNC
-    #define __TBB_EXPORTED_METHOD
-#endif
-
-#if defined(_MSVC_LANG)
-    #define __TBB_LANG _MSVC_LANG
-#else
-    #define __TBB_LANG __cplusplus
-#endif // _MSVC_LANG
-
-#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L)
-#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L)
-#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L)
-
-#if __INTEL_COMPILER || _MSC_VER
-    #define __TBB_NOINLINE(decl) __declspec(noinline) decl
-#elif __GNUC__
-    #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline))
-#else
-    #define __TBB_NOINLINE(decl) decl
-#endif
-
-#define __TBB_STRING_AUX(x) #x
-#define __TBB_STRING(x) __TBB_STRING_AUX(x)
-
-// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match
-// the actual GCC version on the system.
-#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-
-/* Check which standard library we use. */
-
-// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro.
-// Therefore we use different ways to detect its version.
-#ifdef TBB_USE_GLIBCXX_VERSION
-    // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro.
-    // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1.
-    #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION
-#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__
-    // Reported versions of GCC and libstdc++ do not match; trust the latter
-    #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000)
-#elif __GLIBCPP__ || __GLIBCXX__
-    // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION
-    #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION
-#endif
-
-#if __clang__
-    // according to clang documentation, version can be vendor specific
-    #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
-#endif
-
-/** Macro helpers **/
-
-#define __TBB_CONCAT_AUX(A,B) A##B
-// The additional level of indirection is needed to expand macros A and B (not to get the AB macro).
-// See [cpp.subst] and [cpp.concat] for more details.
-#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B)
-// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty).
-#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A)
-#define __TBB_MACRO_EMPTY 1
-
-#if _M_X64
-    #define __TBB_W(name) name##64
-#else
-    #define __TBB_W(name) name
-#endif
-
-/** User controlled TBB features & modes **/
-
-#ifndef TBB_USE_DEBUG
-    /*
-    There are four cases that are supported:
-    1. "_DEBUG is undefined" means "no debug";
-    2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug";
-    3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug";
-    4. "_DEBUG defined to nothing (empty)" means "debug".
-    */
-    #ifdef _DEBUG
-        // Check if _DEBUG is empty.
-        #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY)
-        #if __TBB_IS__DEBUG_EMPTY
-            #define TBB_USE_DEBUG 1
-        #else
-            #define TBB_USE_DEBUG _DEBUG
-        #endif // __TBB_IS__DEBUG_EMPTY
-    #else
-        #define TBB_USE_DEBUG 0
-    #endif // _DEBUG
-#endif // TBB_USE_DEBUG
-
-#ifndef TBB_USE_ASSERT
-    #define TBB_USE_ASSERT TBB_USE_DEBUG
-#endif // TBB_USE_ASSERT
-
-#ifndef TBB_USE_PROFILING_TOOLS
-#if TBB_USE_DEBUG
-    #define TBB_USE_PROFILING_TOOLS 2
-#else // TBB_USE_DEBUG
-    #define TBB_USE_PROFILING_TOOLS 0
-#endif // TBB_USE_DEBUG
-#endif // TBB_USE_PROFILING_TOOLS
-
-// Exceptions support cases
-#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC)
-    #if TBB_USE_EXCEPTIONS
-        #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
-    #elif !defined(TBB_USE_EXCEPTIONS)
-        #define TBB_USE_EXCEPTIONS 0
-    #endif
-#elif !defined(TBB_USE_EXCEPTIONS)
-    #define TBB_USE_EXCEPTIONS 1
-#endif
-
-/** Preprocessor symbols to determine HW architecture **/
-
-#if _WIN32 || _WIN64
-    #if defined(_M_X64) || defined(__x86_64__)  // the latter for MinGW support
-        #define __TBB_x86_64 1
-    #elif defined(_M_IA64)
-        #define __TBB_ipf 1
-    #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support
-        #define __TBB_x86_32 1
-    #else
-        #define __TBB_generic_arch 1
-    #endif
-#else /* Assume generic Unix */
-    #if __x86_64__
-        #define __TBB_x86_64 1
-    #elif __ia64__
-        #define __TBB_ipf 1
-    #elif __i386__||__i386  // __i386 is for Sun OS
-        #define __TBB_x86_32 1
-    #else
-        #define __TBB_generic_arch 1
-    #endif
-#endif
-
-/** Windows API or POSIX API **/
-
-#if _WIN32 || _WIN64
-    #define __TBB_USE_WINAPI 1
-#else
-    #define __TBB_USE_POSIX 1
-#endif
-
-/** Internal TBB features & modes **/
-
-/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/
-#ifndef __TBB_DYNAMIC_LOAD_ENABLED
-    #define __TBB_DYNAMIC_LOAD_ENABLED 1
-#endif
-
-/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load
-    shared libraries at run time only from application container **/
-#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
-    #define __TBB_WIN8UI_SUPPORT 1
-#else
-    #define __TBB_WIN8UI_SUPPORT 0
-#endif
-
-/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
-#ifndef __TBB_WEAK_SYMBOLS_PRESENT
-    #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
-#endif
-
-/** Presence of compiler features **/
-
-#if __clang__ && !__INTEL_COMPILER
-    #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti)
-#elif defined(_CPPRTTI)
-    #define __TBB_USE_OPTIONAL_RTTI 1
-#else
-    #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__)
-#endif
-
-/** Library features presence macros **/
-
-#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT       (__TBB_LANG >= 201402L)
-#define __TBB_CPP17_INVOKE_RESULT_PRESENT          (__TBB_LANG >= 201703L)
-
-// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-// macro when this feature start working correctly on this compiler.
-#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__)
-    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L)
-    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L)
-    #define __TBB_CPP20_CONCEPTS_PRESENT           0 // TODO: add a mechanism for future addition
-#elif __clang__
-    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates))
-    #define __TBB_CPP20_CONCEPTS_PRESENT           0 // TODO: add a mechanism for future addition
-    #ifdef __cpp_deduction_guides
-        #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L)
-    #else
-        #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0
-    #endif
-#elif __GNUC__
-    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000)
-    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__cpp_deduction_guides >= 201606L)
-    #define __TBB_CPP20_CONCEPTS_PRESENT           (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201)
-#elif _MSC_VER
-    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700))
-    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021))
-    #define __TBB_CPP20_CONCEPTS_PRESENT           (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER?
-#else
-    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L)
-    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__TBB_LANG >= 201703L)
-    #define __TBB_CPP20_CONCEPTS_PRESENT           (__TBB_LANG >= 202002L)
-#endif
-
-// GCC4.8 on RHEL7 does not support std::get_new_handler
-#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT             (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION)
-// GCC4.8 on RHEL7 does not support std::is_trivially_copyable
-#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT             (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__))
-
-#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT             0
-#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT        (_MSC_VER >= 1911)
-#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT          (__TBB_LANG >= 201703L)
-#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT   (__TBB_LANG >= 201703L)
-#define __TBB_CPP17_IS_SWAPPABLE_PRESENT                (__TBB_LANG >= 201703L)
-#define __TBB_CPP20_COMPARISONS_PRESENT                 __TBB_CPP20_PRESENT
-
-#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_))
-#define __TBB_RESUMABLE_TASKS 1
-#else
-#define __TBB_RESUMABLE_TASKS 0
-#endif
-
-/* This macro marks incomplete code or comments describing ideas which are considered for the future.
- * See also for plain comment with TODO and FIXME marks for small improvement opportunities.
- */
-#define __TBB_TODO 0
-
-/* Check which standard library we use. */
-/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */
-#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY
-    #include <cstddef>
-#endif
-
-/** Target OS is either iOS* or iOS* simulator **/
-#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__
-    #define __TBB_IOS 1
-#endif
-
-#if __APPLE__
-    #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \
-                         && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000
-        // ICC does not correctly set the macro if -mmacosx-min-version is not specified
-        #define __TBB_MACOS_TARGET_VERSION  (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000))
-    #else
-        #define __TBB_MACOS_TARGET_VERSION  __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
-    #endif
-#endif
-
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-    #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100)
-#endif
-
-#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L)
-#define __TBB_CPP17_NODISCARD_PRESENT   (__TBB_LANG >= 201703L)
-#define __TBB_FALLTHROUGH_PRESENT       (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER)
-
-#if __TBB_CPP17_FALLTHROUGH_PRESENT
-    #define __TBB_fallthrough [[fallthrough]]
-#elif __TBB_FALLTHROUGH_PRESENT
-    #define __TBB_fallthrough __attribute__ ((fallthrough))
-#else
-    #define __TBB_fallthrough
-#endif
-
-#if __TBB_CPP17_NODISCARD_PRESENT
-    #define __TBB_nodiscard [[nodiscard]]
-#elif __clang__ || __GNUC__
-    #define __TBB_nodiscard __attribute__((warn_unused_result))
-#else
-    #define __TBB_nodiscard
-#endif
-
-#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT             (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \
-                                                            || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS)
-
-
-#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__)
-
-#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__)
-
-/** Internal TBB features & modes **/
-
-/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when
-    it's necessary to test internal functions not exported from TBB DLLs
-**/
-#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY)
-    #define __TBB_NO_IMPLICIT_LINKAGE 1
-    #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
-#endif
-
-#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
-    #define __TBB_NO_IMPLICIT_LINKAGE 1
-#endif
-
-#if _MSC_VER
-    #if !__TBB_NO_IMPLICIT_LINKAGE
-        #ifdef _DEBUG
-            #pragma comment(lib, "tbb12_debug.lib")
-        #else
-            #pragma comment(lib, "tbb12.lib")
-        #endif
-    #endif
-#endif
-
-#ifndef __TBB_SCHEDULER_OBSERVER
-    #define __TBB_SCHEDULER_OBSERVER 1
-#endif /* __TBB_SCHEDULER_OBSERVER */
-
-#ifndef __TBB_FP_CONTEXT
-    #define __TBB_FP_CONTEXT 1
-#endif /* __TBB_FP_CONTEXT */
-
-#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official
-
-#ifndef __TBB_ARENA_OBSERVER
-    #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER
-#endif /* __TBB_ARENA_OBSERVER */
-
-#ifndef __TBB_ARENA_BINDING
-    #define __TBB_ARENA_BINDING 1
-#endif
-
-#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD
-    #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1
-#endif
-
-#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING
-    #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1
-#endif
-
-#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY
-    #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1
-#endif
-
-#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \
-          (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__))
-    #define __TBB_SURVIVE_THREAD_SWITCH 1
-#endif /* __TBB_SURVIVE_THREAD_SWITCH */
-
-#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES
-    #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD
-#endif
-
-#ifndef __TBB_DEFAULT_PARTITIONER
-    #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
-#endif
-
-#ifndef __TBB_FLOW_TRACE_CODEPTR
-    #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD
-#endif
-
-// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template
-// instantiation site, which is too late for suppression of the corresponding messages for internal
-// stuff.
-#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0))
-    #if (__TBB_LANG >= 201402L)
-        #define __TBB_DEPRECATED [[deprecated]]
-        #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]]
-    #elif _MSC_VER
-        #define __TBB_DEPRECATED __declspec(deprecated)
-        #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg))
-    #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__
-        #define __TBB_DEPRECATED __attribute__((deprecated))
-        #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
-    #endif
-#endif  // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
-
-#if !defined(__TBB_DEPRECATED)
-    #define __TBB_DEPRECATED
-    #define __TBB_DEPRECATED_MSG(msg)
-#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES)
-    // Suppress deprecated messages from self
-    #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1
-#endif
-
-#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
-    #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED
-    #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg)
-#else
-    #define __TBB_DEPRECATED_VERBOSE
-    #define __TBB_DEPRECATED_VERBOSE_MSG(msg)
-#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
-
-#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900)
-    #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.")
-#endif
-
-#ifdef _VARIADIC_MAX
-    #define __TBB_VARIADIC_MAX _VARIADIC_MAX
-#else
-    #if _MSC_VER == 1700
-        #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12
-    #elif _MSC_VER == 1600
-        #define __TBB_VARIADIC_MAX 10 // VS10 setting
-    #else
-        #define __TBB_VARIADIC_MAX 15
-    #endif
-#endif
-
-/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
-    the bugs in compilers, standard or OS specific libraries. They should be
-    removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
-    versions go out of the support list.
-**/
-
-// Some STL containers not support allocator traits in old GCC versions
-#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301
-    #define TBB_ALLOCATOR_TRAITS_BROKEN 1
-#endif
-
-// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op.
-#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
-    #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1
-#endif
-
-/** End of __TBB_XXX_BROKEN macro section **/
-
-#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER)
-    // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702)
-    #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1
-#endif
-
-// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang
-// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19)
-#if __ANDROID__
-    #include <android/api-level.h>
-#endif
-
-#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES)
-
-#ifndef __TBB_PREVIEW_CRITICAL_TASKS
-#define __TBB_PREVIEW_CRITICAL_TASKS            1
-#endif
-
-#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET       (TBB_PREVIEW_FLOW_GRAPH_FEATURES)
-#endif
-
-
-#if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500
-  #define __TBB_ALIGNAS_AVAILABLE 1
-#else
-  #define __TBB_ALIGNAS_AVAILABLE 0
-#endif
-
-#endif // __TBB_detail__config_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__config_H 
+#define __TBB_detail__config_H 
+ 
+/** This header is supposed to contain macro definitions only. 
+    The macros defined here are intended to control such aspects of TBB build as 
+    - presence of compiler features 
+    - compilation modes 
+    - feature sets 
+    - known compiler/platform issues 
+**/ 
+ 
+/* Check which standard library we use. */ 
+#include <cstddef> 
+ 
+#if _MSC_VER 
+    #define __TBB_EXPORTED_FUNC   __cdecl 
+    #define __TBB_EXPORTED_METHOD __thiscall 
+#else 
+    #define __TBB_EXPORTED_FUNC 
+    #define __TBB_EXPORTED_METHOD 
+#endif 
+ 
+#if defined(_MSVC_LANG) 
+    #define __TBB_LANG _MSVC_LANG 
+#else 
+    #define __TBB_LANG __cplusplus 
+#endif // _MSVC_LANG 
+ 
+#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) 
+#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) 
+#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L) 
+ 
+#if __INTEL_COMPILER || _MSC_VER 
+    #define __TBB_NOINLINE(decl) __declspec(noinline) decl 
+#elif __GNUC__ 
+    #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) 
+#else 
+    #define __TBB_NOINLINE(decl) decl 
+#endif 
+ 
+#define __TBB_STRING_AUX(x) #x 
+#define __TBB_STRING(x) __TBB_STRING_AUX(x) 
+ 
+// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match 
+// the actual GCC version on the system. 
+#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 
+ 
+/* Check which standard library we use. */ 
+ 
+// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro. 
+// Therefore we use different ways to detect its version. 
+#ifdef TBB_USE_GLIBCXX_VERSION 
+    // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro. 
+    // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1. 
+    #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION 
+#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__ 
+    // Reported versions of GCC and libstdc++ do not match; trust the latter 
+    #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000) 
+#elif __GLIBCPP__ || __GLIBCXX__ 
+    // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION 
+    #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION 
+#endif 
+ 
+#if __clang__ 
+    // according to clang documentation, version can be vendor specific 
+    #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) 
+#endif 
+ 
+/** Macro helpers **/ 
+ 
+#define __TBB_CONCAT_AUX(A,B) A##B 
+// The additional level of indirection is needed to expand macros A and B (not to get the AB macro). 
+// See [cpp.subst] and [cpp.concat] for more details. 
+#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B) 
+// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty). 
+#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) 
+#define __TBB_MACRO_EMPTY 1 
+ 
+#if _M_X64 
+    #define __TBB_W(name) name##64 
+#else 
+    #define __TBB_W(name) name 
+#endif 
+ 
+/** User controlled TBB features & modes **/ 
+ 
+#ifndef TBB_USE_DEBUG 
+    /* 
+    There are four cases that are supported: 
+    1. "_DEBUG is undefined" means "no debug"; 
+    2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug"; 
+    3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug"; 
+    4. "_DEBUG defined to nothing (empty)" means "debug". 
+    */ 
+    #ifdef _DEBUG 
+        // Check if _DEBUG is empty. 
+        #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY) 
+        #if __TBB_IS__DEBUG_EMPTY 
+            #define TBB_USE_DEBUG 1 
+        #else 
+            #define TBB_USE_DEBUG _DEBUG 
+        #endif // __TBB_IS__DEBUG_EMPTY 
+    #else 
+        #define TBB_USE_DEBUG 0 
+    #endif // _DEBUG 
+#endif // TBB_USE_DEBUG 
+ 
+#ifndef TBB_USE_ASSERT 
+    #define TBB_USE_ASSERT TBB_USE_DEBUG 
+#endif // TBB_USE_ASSERT 
+ 
+#ifndef TBB_USE_PROFILING_TOOLS 
+#if TBB_USE_DEBUG 
+    #define TBB_USE_PROFILING_TOOLS 2 
+#else // TBB_USE_DEBUG 
+    #define TBB_USE_PROFILING_TOOLS 0 
+#endif // TBB_USE_DEBUG 
+#endif // TBB_USE_PROFILING_TOOLS 
+ 
+// Exceptions support cases 
+#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC) 
+    #if TBB_USE_EXCEPTIONS 
+        #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. 
+    #elif !defined(TBB_USE_EXCEPTIONS) 
+        #define TBB_USE_EXCEPTIONS 0 
+    #endif 
+#elif !defined(TBB_USE_EXCEPTIONS) 
+    #define TBB_USE_EXCEPTIONS 1 
+#endif 
+ 
+/** Preprocessor symbols to determine HW architecture **/ 
+ 
+#if _WIN32 || _WIN64 
+    #if defined(_M_X64) || defined(__x86_64__)  // the latter for MinGW support 
+        #define __TBB_x86_64 1 
+    #elif defined(_M_IA64) 
+        #define __TBB_ipf 1 
+    #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support 
+        #define __TBB_x86_32 1 
+    #else 
+        #define __TBB_generic_arch 1 
+    #endif 
+#else /* Assume generic Unix */ 
+    #if __x86_64__ 
+        #define __TBB_x86_64 1 
+    #elif __ia64__ 
+        #define __TBB_ipf 1 
+    #elif __i386__||__i386  // __i386 is for Sun OS 
+        #define __TBB_x86_32 1 
+    #else 
+        #define __TBB_generic_arch 1 
+    #endif 
+#endif 
+ 
+/** Windows API or POSIX API **/ 
+ 
+#if _WIN32 || _WIN64 
+    #define __TBB_USE_WINAPI 1 
+#else 
+    #define __TBB_USE_POSIX 1 
+#endif 
+ 
+/** Internal TBB features & modes **/ 
+ 
+/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ 
+#ifndef __TBB_DYNAMIC_LOAD_ENABLED 
+    #define __TBB_DYNAMIC_LOAD_ENABLED 1 
+#endif 
+ 
+/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load 
+    shared libraries at run time only from application container **/ 
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP 
+    #define __TBB_WIN8UI_SUPPORT 1 
+#else 
+    #define __TBB_WIN8UI_SUPPORT 0 
+#endif 
+ 
+/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ 
+#ifndef __TBB_WEAK_SYMBOLS_PRESENT 
+    #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) 
+#endif 
+ 
+/** Presence of compiler features **/ 
+ 
+#if __clang__ && !__INTEL_COMPILER 
+    #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti) 
+#elif defined(_CPPRTTI) 
+    #define __TBB_USE_OPTIONAL_RTTI 1 
+#else 
+    #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) 
+#endif 
+ 
+/** Library features presence macros **/ 
+ 
+#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT       (__TBB_LANG >= 201402L) 
+#define __TBB_CPP17_INVOKE_RESULT_PRESENT          (__TBB_LANG >= 201703L) 
+ 
+// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+// macro when this feature start working correctly on this compiler. 
+#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__) 
+    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) 
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L) 
+    #define __TBB_CPP20_CONCEPTS_PRESENT           0 // TODO: add a mechanism for future addition 
+#elif __clang__ 
+    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates)) 
+    #define __TBB_CPP20_CONCEPTS_PRESENT           0 // TODO: add a mechanism for future addition 
+    #ifdef __cpp_deduction_guides 
+        #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L) 
+    #else 
+        #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 
+    #endif 
+#elif __GNUC__ 
+    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000) 
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__cpp_deduction_guides >= 201606L) 
+    #define __TBB_CPP20_CONCEPTS_PRESENT           (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201) 
+#elif _MSC_VER 
+    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700)) 
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021)) 
+    #define __TBB_CPP20_CONCEPTS_PRESENT           (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER? 
+#else 
+    #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) 
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT   (__TBB_LANG >= 201703L) 
+    #define __TBB_CPP20_CONCEPTS_PRESENT           (__TBB_LANG >= 202002L) 
+#endif 
+ 
+// GCC4.8 on RHEL7 does not support std::get_new_handler 
+#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT             (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) 
+// GCC4.8 on RHEL7 does not support std::is_trivially_copyable 
+#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT             (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__)) 
+ 
+#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT             0 
+#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT        (_MSC_VER >= 1911) 
+#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT          (__TBB_LANG >= 201703L) 
+#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT   (__TBB_LANG >= 201703L) 
+#define __TBB_CPP17_IS_SWAPPABLE_PRESENT                (__TBB_LANG >= 201703L) 
+#define __TBB_CPP20_COMPARISONS_PRESENT                 __TBB_CPP20_PRESENT 
+ 
+#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_)) 
+#define __TBB_RESUMABLE_TASKS 1 
+#else 
+#define __TBB_RESUMABLE_TASKS 0 
+#endif 
+ 
+/* This macro marks incomplete code or comments describing ideas which are considered for the future. 
+ * See also for plain comment with TODO and FIXME marks for small improvement opportunities. 
+ */ 
+#define __TBB_TODO 0 
+ 
+/* Check which standard library we use. */ 
+/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */ 
+#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY 
+    #include <cstddef> 
+#endif 
+ 
+/** Target OS is either iOS* or iOS* simulator **/ 
+#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ 
+    #define __TBB_IOS 1 
+#endif 
+ 
+#if __APPLE__ 
+    #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ 
+                         && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 
+        // ICC does not correctly set the macro if -mmacosx-min-version is not specified 
+        #define __TBB_MACOS_TARGET_VERSION  (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) 
+    #else 
+        #define __TBB_MACOS_TARGET_VERSION  __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ 
+    #endif 
+#endif 
+ 
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER) 
+    #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) 
+#endif 
+ 
+#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) 
+#define __TBB_CPP17_NODISCARD_PRESENT   (__TBB_LANG >= 201703L) 
+#define __TBB_FALLTHROUGH_PRESENT       (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) 
+ 
+#if __TBB_CPP17_FALLTHROUGH_PRESENT 
+    #define __TBB_fallthrough [[fallthrough]] 
+#elif __TBB_FALLTHROUGH_PRESENT 
+    #define __TBB_fallthrough __attribute__ ((fallthrough)) 
+#else 
+    #define __TBB_fallthrough 
+#endif 
+ 
+#if __TBB_CPP17_NODISCARD_PRESENT 
+    #define __TBB_nodiscard [[nodiscard]] 
+#elif __clang__ || __GNUC__ 
+    #define __TBB_nodiscard __attribute__((warn_unused_result)) 
+#else 
+    #define __TBB_nodiscard 
+#endif 
+ 
+#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT             (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ 
+                                                            || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS) 
+ 
+ 
+#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__) 
+ 
+#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) 
+ 
+/** Internal TBB features & modes **/ 
+ 
+/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when 
+    it's necessary to test internal functions not exported from TBB DLLs 
+**/ 
+#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY) 
+    #define __TBB_NO_IMPLICIT_LINKAGE 1 
+    #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 
+#endif 
+ 
+#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE) 
+    #define __TBB_NO_IMPLICIT_LINKAGE 1 
+#endif 
+ 
+#if _MSC_VER 
+    #if !__TBB_NO_IMPLICIT_LINKAGE 
+        #ifdef _DEBUG 
+            #pragma comment(lib, "tbb12_debug.lib") 
+        #else 
+            #pragma comment(lib, "tbb12.lib") 
+        #endif 
+    #endif 
+#endif 
+ 
+#ifndef __TBB_SCHEDULER_OBSERVER 
+    #define __TBB_SCHEDULER_OBSERVER 1 
+#endif /* __TBB_SCHEDULER_OBSERVER */ 
+ 
+#ifndef __TBB_FP_CONTEXT 
+    #define __TBB_FP_CONTEXT 1 
+#endif /* __TBB_FP_CONTEXT */ 
+ 
+#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official 
+ 
+#ifndef __TBB_ARENA_OBSERVER 
+    #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER 
+#endif /* __TBB_ARENA_OBSERVER */ 
+ 
+#ifndef __TBB_ARENA_BINDING 
+    #define __TBB_ARENA_BINDING 1 
+#endif 
+ 
+#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD 
+    #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 
+#endif 
+ 
+#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING 
+    #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 
+#endif 
+ 
+#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY 
+    #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 
+#endif 
+ 
+#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ 
+          (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__)) 
+    #define __TBB_SURVIVE_THREAD_SWITCH 1 
+#endif /* __TBB_SURVIVE_THREAD_SWITCH */ 
+ 
+#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES 
+    #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD 
+#endif 
+ 
+#ifndef __TBB_DEFAULT_PARTITIONER 
+    #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner 
+#endif 
+ 
+#ifndef __TBB_FLOW_TRACE_CODEPTR 
+    #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD 
+#endif 
+ 
+// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template 
+// instantiation site, which is too late for suppression of the corresponding messages for internal 
+// stuff. 
+#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) 
+    #if (__TBB_LANG >= 201402L) 
+        #define __TBB_DEPRECATED [[deprecated]] 
+        #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] 
+    #elif _MSC_VER 
+        #define __TBB_DEPRECATED __declspec(deprecated) 
+        #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) 
+    #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__ 
+        #define __TBB_DEPRECATED __attribute__((deprecated)) 
+        #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) 
+    #endif 
+#endif  // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) 
+ 
+#if !defined(__TBB_DEPRECATED) 
+    #define __TBB_DEPRECATED 
+    #define __TBB_DEPRECATED_MSG(msg) 
+#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES) 
+    // Suppress deprecated messages from self 
+    #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1 
+#endif 
+ 
+#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) 
+    #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED 
+    #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg) 
+#else 
+    #define __TBB_DEPRECATED_VERBOSE 
+    #define __TBB_DEPRECATED_VERBOSE_MSG(msg) 
+#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) 
+ 
+#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900) 
+    #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.") 
+#endif 
+ 
+#ifdef _VARIADIC_MAX 
+    #define __TBB_VARIADIC_MAX _VARIADIC_MAX 
+#else 
+    #if _MSC_VER == 1700 
+        #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12 
+    #elif _MSC_VER == 1600 
+        #define __TBB_VARIADIC_MAX 10 // VS10 setting 
+    #else 
+        #define __TBB_VARIADIC_MAX 15 
+    #endif 
+#endif 
+ 
+/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by 
+    the bugs in compilers, standard or OS specific libraries. They should be 
+    removed as soon as the corresponding bugs are fixed or the buggy OS/compiler 
+    versions go out of the support list. 
+**/ 
+ 
+// Some STL containers not support allocator traits in old GCC versions 
+#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301 
+    #define TBB_ALLOCATOR_TRAITS_BROKEN 1 
+#endif 
+ 
+// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op. 
+#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 
+    #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1 
+#endif 
+ 
+/** End of __TBB_XXX_BROKEN macro section **/ 
+ 
+#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER) 
+    // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702) 
+    #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1 
+#endif 
+ 
+// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang 
+// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19) 
+#if __ANDROID__ 
+    #include <android/api-level.h> 
+#endif 
+ 
+#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES) 
+ 
+#ifndef __TBB_PREVIEW_CRITICAL_TASKS 
+#define __TBB_PREVIEW_CRITICAL_TASKS            1 
+#endif 
+ 
+#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET       (TBB_PREVIEW_FLOW_GRAPH_FEATURES) 
+#endif 
+ 
+ 
+#if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500 
+  #define __TBB_ALIGNAS_AVAILABLE 1 
+#else 
+  #define __TBB_ALIGNAS_AVAILABLE 0 
+#endif 
+ 
+#endif // __TBB_detail__config_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h
index 4dca07fa10..89ac137451 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h
@@ -1,67 +1,67 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__containers_helpers_H
-#define __TBB_detail__containers_helpers_H
-
-#include "_template_helpers.h"
-#include "_allocator_traits.h"
-#include <type_traits>
-#include <memory>
-#include <functional>
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-template <typename Compare, typename = void>
-struct comp_is_transparent : std::false_type {};
-
-template <typename Compare>
-struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {};
-
-template <typename Key, typename Hasher, typename KeyEqual, typename = void >
-struct has_transparent_key_equal : std::false_type { using type = KeyEqual; };
-
-template <typename Key, typename Hasher, typename KeyEqual>
-struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type {
-    using type = typename Hasher::transparent_key_equal;
-    static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type.");
-    static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value ||
-        std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal.");
- };
-
-struct is_iterator_impl {
-template <typename T>
-using iter_traits_category = typename std::iterator_traits<T>::iterator_category;
-
-template <typename T>
-using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type;
-}; // struct is_iterator_impl
-
-template <typename T>
-using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>;
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename T>
-inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value;
-#endif
-
-} // inline namespace d0
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__containers_helpers_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__containers_helpers_H 
+#define __TBB_detail__containers_helpers_H 
+ 
+#include "_template_helpers.h" 
+#include "_allocator_traits.h" 
+#include <type_traits> 
+#include <memory> 
+#include <functional> 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+template <typename Compare, typename = void> 
+struct comp_is_transparent : std::false_type {}; 
+ 
+template <typename Compare> 
+struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {}; 
+ 
+template <typename Key, typename Hasher, typename KeyEqual, typename = void > 
+struct has_transparent_key_equal : std::false_type { using type = KeyEqual; }; 
+ 
+template <typename Key, typename Hasher, typename KeyEqual> 
+struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type { 
+    using type = typename Hasher::transparent_key_equal; 
+    static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type."); 
+    static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value || 
+        std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal."); 
+ }; 
+ 
+struct is_iterator_impl { 
+template <typename T> 
+using iter_traits_category = typename std::iterator_traits<T>::iterator_category; 
+ 
+template <typename T> 
+using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type; 
+}; // struct is_iterator_impl 
+ 
+template <typename T> 
+using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>; 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename T> 
+inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value; 
+#endif 
+ 
+} // inline namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__containers_helpers_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h
index 9764209fa8..05596ce797 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h
@@ -1,88 +1,88 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__exception_H
-#define __TBB__exception_H
-
-#include "_config.h"
-
-#include <new>          // std::bad_alloc
-#include <exception>    // std::exception
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-#include <stdexcept>    // std::runtime_error
-#endif
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-enum class exception_id {
-    bad_alloc = 1,
-    bad_last_alloc,
-    user_abort,
-    nonpositive_step,
-    out_of_range,
-    reservation_length_error,
-    missing_wait,
-    invalid_load_factor,
-    invalid_key,
-    bad_tagged_msg_cast,
-    unsafe_wait,
-    last_entry
-};
-} // namespace d0
-
-namespace r1 {
-//! Exception for concurrent containers
-class bad_last_alloc : public std::bad_alloc {
-public:
-    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
-};
-
-//! Exception for user-initiated abort
-class user_abort : public std::exception {
-public:
-    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
-};
-
-//! Exception for missing wait on structured_task_group
-class missing_wait : public std::exception {
-public:
-    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
-};
-
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-//! Exception for impossible finalization of task_sheduler_handle
-class unsafe_wait : public std::runtime_error {
-public:
-    unsafe_wait(const char* msg) : std::runtime_error(msg) {}
-};
-#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-
-//! Gathers all throw operators in one place.
-/** Its purpose is to minimize code bloat that can be caused by throw operators
-    scattered in multiple places, especially in templates. **/
-void __TBB_EXPORTED_FUNC throw_exception ( exception_id );
-} // namespace r1
-
-inline namespace d0 {
-using r1::throw_exception;
-} // namespace d0
-
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB__exception_H
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__exception_H 
+#define __TBB__exception_H 
+ 
+#include "_config.h" 
+ 
+#include <new>          // std::bad_alloc 
+#include <exception>    // std::exception 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+#include <stdexcept>    // std::runtime_error 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+enum class exception_id { 
+    bad_alloc = 1, 
+    bad_last_alloc, 
+    user_abort, 
+    nonpositive_step, 
+    out_of_range, 
+    reservation_length_error, 
+    missing_wait, 
+    invalid_load_factor, 
+    invalid_key, 
+    bad_tagged_msg_cast, 
+    unsafe_wait, 
+    last_entry 
+}; 
+} // namespace d0 
+ 
+namespace r1 { 
+//! Exception for concurrent containers 
+class bad_last_alloc : public std::bad_alloc { 
+public: 
+    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; 
+}; 
+ 
+//! Exception for user-initiated abort 
+class user_abort : public std::exception { 
+public: 
+    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; 
+}; 
+ 
+//! Exception for missing wait on structured_task_group 
+class missing_wait : public std::exception { 
+public: 
+    const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; 
+}; 
+ 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+//! Exception for impossible finalization of task_sheduler_handle 
+class unsafe_wait : public std::runtime_error { 
+public: 
+    unsafe_wait(const char* msg) : std::runtime_error(msg) {} 
+}; 
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+ 
+//! Gathers all throw operators in one place. 
+/** Its purpose is to minimize code bloat that can be caused by throw operators 
+    scattered in multiple places, especially in templates. **/ 
+void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); 
+} // namespace r1 
+ 
+inline namespace d0 { 
+using r1::throw_exception; 
+} // namespace d0 
+ 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB__exception_H 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h
index 34ba1efcaf..3810e0392a 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h
@@ -1,371 +1,371 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_body_impl_H
-#define __TBB__flow_graph_body_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::detail::d1 (in flow_graph.h)
-
-typedef std::uint64_t tag_value;
-
-
-// TODO revamp: find out if there is already helper for has_policy.
-template<typename ... Policies> struct Policy {};
-
-template<typename ... Policies> struct has_policy;
-
-template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies>
-struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> :
-    std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value ||
-                                 has_policy<ExpectedPolicy, Policies...>::value> {};
-
-template<typename ExpectedPolicy, typename SinglePolicy>
-struct has_policy<ExpectedPolicy, SinglePolicy> :
-    std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {};
-
-template<typename ExpectedPolicy, typename ...Policies>
-struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {};
-
-namespace graph_policy_namespace {
-
-    struct rejecting { };
-    struct reserving { };
-    struct queueing  { };
-    struct lightweight  { };
-
-    // K == type of field used for key-matching.  Each tag-matching port will be provided
-    // functor that, given an object accepted by the port, will return the
-    /// field of type K being used for matching.
-    template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > >
-    struct key_matching {
-        typedef K key_type;
-        typedef typename std::decay<K>::type base_key_type;
-        typedef KHash hash_compare_type;
-    };
-
-    // old tag_matching join's new specifier
-    typedef key_matching<tag_value> tag_matching;
-
-    // Aliases for Policy combinations
-    typedef Policy<queueing, lightweight> queueing_lightweight;
-    typedef Policy<rejecting, lightweight> rejecting_lightweight;
-
-} // namespace graph_policy_namespace
-
-// -------------- function_body containers ----------------------
-
-//! A functor that takes no input and generates a value of type Output
-template< typename Output >
-class input_body : no_assign {
-public:
-    virtual ~input_body() {}
-    virtual Output operator()(flow_control& fc) = 0;
-    virtual input_body* clone() = 0;
-};
-
-//! The leaf for input_body
-template< typename Output, typename Body>
-class input_body_leaf : public input_body<Output> {
-public:
-    input_body_leaf( const Body &_body ) : body(_body) { }
-    Output operator()(flow_control& fc) override { return body(fc); }
-    input_body_leaf* clone() override {
-        return new input_body_leaf< Output, Body >(body);
-    }
-    Body get_body() { return body; }
-private:
-    Body body;
-};
-
-//! A functor that takes an Input and generates an Output
-template< typename Input, typename Output >
-class function_body : no_assign {
-public:
-    virtual ~function_body() {}
-    virtual Output operator()(const Input &input) = 0;
-    virtual function_body* clone() = 0;
-};
-
-//! the leaf for function_body
-template <typename Input, typename Output, typename B>
-class function_body_leaf : public function_body< Input, Output > {
-public:
-    function_body_leaf( const B &_body ) : body(_body) { }
-    Output operator()(const Input &i) override { return body(i); }
-    B get_body() { return body; }
-    function_body_leaf* clone() override {
-        return new function_body_leaf< Input, Output, B >(body);
-    }
-private:
-    B body;
-};
-
-//! the leaf for function_body specialized for Input and output of continue_msg
-template <typename B>
-class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
-public:
-    function_body_leaf( const B &_body ) : body(_body) { }
-    continue_msg operator()( const continue_msg &i ) override {
-        body(i);
-        return i;
-    }
-    B get_body() { return body; }
-    function_body_leaf* clone() override {
-        return new function_body_leaf< continue_msg, continue_msg, B >(body);
-    }
-private:
-    B body;
-};
-
-//! the leaf for function_body specialized for Output of continue_msg
-template <typename Input, typename B>
-class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
-public:
-    function_body_leaf( const B &_body ) : body(_body) { }
-    continue_msg operator()(const Input &i) override {
-        body(i);
-        return continue_msg();
-    }
-    B get_body() { return body; }
-    function_body_leaf* clone() override {
-        return new function_body_leaf< Input, continue_msg, B >(body);
-    }
-private:
-    B body;
-};
-
-//! the leaf for function_body specialized for Input of continue_msg
-template <typename Output, typename B>
-class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
-public:
-    function_body_leaf( const B &_body ) : body(_body) { }
-    Output operator()(const continue_msg &i) override {
-        return body(i);
-    }
-    B get_body() { return body; }
-    function_body_leaf* clone() override {
-        return new function_body_leaf< continue_msg, Output, B >(body);
-    }
-private:
-    B body;
-};
-
-//! function_body that takes an Input and a set of output ports
-template<typename Input, typename OutputSet>
-class multifunction_body : no_assign {
-public:
-    virtual ~multifunction_body () {}
-    virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
-    virtual multifunction_body* clone() = 0;
-    virtual void* get_body_ptr() = 0;
-};
-
-//! leaf for multifunction.  OutputSet can be a std::tuple or a vector.
-template<typename Input, typename OutputSet, typename B >
-class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
-public:
-    multifunction_body_leaf(const B &_body) : body(_body) { }
-    void operator()(const Input &input, OutputSet &oset) override {
-        body(input, oset); // body may explicitly put() to one or more of oset.
-    }
-    void* get_body_ptr() override { return &body; }
-    multifunction_body_leaf* clone() override {
-        return new multifunction_body_leaf<Input, OutputSet,B>(body);
-    }
-
-private:
-    B body;
-};
-
-// ------ function bodies for hash_buffers and key-matching joins.
-
-template<typename Input, typename Output>
-class type_to_key_function_body : no_assign {
-    public:
-        virtual ~type_to_key_function_body() {}
-        virtual Output operator()(const Input &input) = 0;  // returns an Output
-        virtual type_to_key_function_body* clone() = 0;
-};
-
-// specialization for ref output
-template<typename Input, typename Output>
-class type_to_key_function_body<Input,Output&> : no_assign {
-    public:
-        virtual ~type_to_key_function_body() {}
-        virtual const Output & operator()(const Input &input) = 0;  // returns a const Output&
-        virtual type_to_key_function_body* clone() = 0;
-};
-
-template <typename Input, typename Output, typename B>
-class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> {
-public:
-    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
-    Output operator()(const Input &i) override { return body(i); }
-    type_to_key_function_body_leaf* clone() override {
-        return new type_to_key_function_body_leaf< Input, Output, B>(body);
-    }
-private:
-    B body;
-};
-
-template <typename Input, typename Output, typename B>
-class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> {
-public:
-    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
-    const Output& operator()(const Input &i) override {
-        return body(i);
-    }
-    type_to_key_function_body_leaf* clone() override {
-        return new type_to_key_function_body_leaf< Input, Output&, B>(body);
-    }
-private:
-    B body;
-};
-
-// --------------------------- end of function_body containers ------------------------
-
-// --------------------------- node task bodies ---------------------------------------
-
-//! A task that calls a node's forward_task function
-template< typename NodeType >
-class forward_task_bypass : public graph_task {
-    NodeType &my_node;
-public:
-    forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n
-                         , node_priority_t node_priority = no_priority
-    ) : graph_task(g, allocator, node_priority),
-    my_node(n) {}
-
-    task* execute(execution_data& ed) override {
-        graph_task* next_task = my_node.forward_task();
-        if (SUCCESSFULLY_ENQUEUED == next_task)
-            next_task = nullptr;
-        else if (next_task)
-            next_task = prioritize_task(my_node.graph_reference(), *next_task);
-        finalize(ed);
-        return next_task;
-    }
-};
-
-//! A task that calls a node's apply_body_bypass function, passing in an input of type Input
-//  return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL
-template< typename NodeType, typename Input >
-class apply_body_task_bypass : public graph_task {
-    NodeType &my_node;
-    Input my_input;
-public:
-
-    apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i
-                            , node_priority_t node_priority = no_priority
-    ) : graph_task(g, allocator, node_priority),
-        my_node(n), my_input(i) {}
-
-    task* execute(execution_data& ed) override {
-        graph_task* next_task = my_node.apply_body_bypass( my_input );
-        if (SUCCESSFULLY_ENQUEUED == next_task)
-            next_task = nullptr;
-        else if (next_task)
-            next_task = prioritize_task(my_node.graph_reference(), *next_task);
-        finalize(ed);
-        return next_task;
-
-    }
-};
-
-//! A task that calls a node's apply_body_bypass function with no input
-template< typename NodeType >
-class input_node_task_bypass : public graph_task {
-    NodeType &my_node;
-public:
-    input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n )
-        : graph_task(g, allocator), my_node(n) {}
-
-    task* execute(execution_data& ed) override {
-        graph_task* next_task = my_node.apply_body_bypass( );
-        if (SUCCESSFULLY_ENQUEUED == next_task)
-            next_task = nullptr;
-        else if (next_task)
-            next_task = prioritize_task(my_node.graph_reference(), *next_task);
-        finalize(ed);
-        return next_task;
-    }
-
-};
-
-// ------------------------ end of node task bodies -----------------------------------
-
-template<typename T, typename DecrementType, typename DummyType = void>
-class threshold_regulator;
-
-template<typename T, typename DecrementType>
-class threshold_regulator<T, DecrementType,
-                  typename std::enable_if<std::is_integral<DecrementType>::value>::type>
-    : public receiver<DecrementType>, no_copy
-{
-    T* my_node;
-protected:
-
-    graph_task* try_put_task( const DecrementType& value ) override {
-        graph_task* result = my_node->decrement_counter( value );
-        if( !result )
-            result = SUCCESSFULLY_ENQUEUED;
-        return result;
-    }
-
-    graph& graph_reference() const override {
-        return my_node->my_graph;
-    }
-
-    template<typename U, typename V> friend class limiter_node;
-    void reset_receiver( reset_flags ) {}
-
-public:
-    threshold_regulator(T* owner) : my_node(owner) {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-};
-
-template<typename T>
-class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy {
-
-    T *my_node;
-
-    graph_task* execute() override {
-        return my_node->decrement_counter( 1 );
-    }
-
-protected:
-
-    graph& graph_reference() const override {
-        return my_node->my_graph;
-    }
-
-public:
-
-    typedef continue_msg input_type;
-    typedef continue_msg output_type;
-    threshold_regulator(T* owner)
-        : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner)
-    {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-};
-
-#endif // __TBB__flow_graph_body_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_body_impl_H 
+#define __TBB__flow_graph_body_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included in namespace tbb::detail::d1 (in flow_graph.h) 
+ 
+typedef std::uint64_t tag_value; 
+ 
+ 
+// TODO revamp: find out if there is already helper for has_policy. 
+template<typename ... Policies> struct Policy {}; 
+ 
+template<typename ... Policies> struct has_policy; 
+ 
+template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies> 
+struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> : 
+    std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value || 
+                                 has_policy<ExpectedPolicy, Policies...>::value> {}; 
+ 
+template<typename ExpectedPolicy, typename SinglePolicy> 
+struct has_policy<ExpectedPolicy, SinglePolicy> : 
+    std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {}; 
+ 
+template<typename ExpectedPolicy, typename ...Policies> 
+struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {}; 
+ 
+namespace graph_policy_namespace { 
+ 
+    struct rejecting { }; 
+    struct reserving { }; 
+    struct queueing  { }; 
+    struct lightweight  { }; 
+ 
+    // K == type of field used for key-matching.  Each tag-matching port will be provided 
+    // functor that, given an object accepted by the port, will return the 
+    /// field of type K being used for matching. 
+    template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > > 
+    struct key_matching { 
+        typedef K key_type; 
+        typedef typename std::decay<K>::type base_key_type; 
+        typedef KHash hash_compare_type; 
+    }; 
+ 
+    // old tag_matching join's new specifier 
+    typedef key_matching<tag_value> tag_matching; 
+ 
+    // Aliases for Policy combinations 
+    typedef Policy<queueing, lightweight> queueing_lightweight; 
+    typedef Policy<rejecting, lightweight> rejecting_lightweight; 
+ 
+} // namespace graph_policy_namespace 
+ 
+// -------------- function_body containers ---------------------- 
+ 
+//! A functor that takes no input and generates a value of type Output 
+template< typename Output > 
+class input_body : no_assign { 
+public: 
+    virtual ~input_body() {} 
+    virtual Output operator()(flow_control& fc) = 0; 
+    virtual input_body* clone() = 0; 
+}; 
+ 
+//! The leaf for input_body 
+template< typename Output, typename Body> 
+class input_body_leaf : public input_body<Output> { 
+public: 
+    input_body_leaf( const Body &_body ) : body(_body) { } 
+    Output operator()(flow_control& fc) override { return body(fc); } 
+    input_body_leaf* clone() override { 
+        return new input_body_leaf< Output, Body >(body); 
+    } 
+    Body get_body() { return body; } 
+private: 
+    Body body; 
+}; 
+ 
+//! A functor that takes an Input and generates an Output 
+template< typename Input, typename Output > 
+class function_body : no_assign { 
+public: 
+    virtual ~function_body() {} 
+    virtual Output operator()(const Input &input) = 0; 
+    virtual function_body* clone() = 0; 
+}; 
+ 
+//! the leaf for function_body 
+template <typename Input, typename Output, typename B> 
+class function_body_leaf : public function_body< Input, Output > { 
+public: 
+    function_body_leaf( const B &_body ) : body(_body) { } 
+    Output operator()(const Input &i) override { return body(i); } 
+    B get_body() { return body; } 
+    function_body_leaf* clone() override { 
+        return new function_body_leaf< Input, Output, B >(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+//! the leaf for function_body specialized for Input and output of continue_msg 
+template <typename B> 
+class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > { 
+public: 
+    function_body_leaf( const B &_body ) : body(_body) { } 
+    continue_msg operator()( const continue_msg &i ) override { 
+        body(i); 
+        return i; 
+    } 
+    B get_body() { return body; } 
+    function_body_leaf* clone() override { 
+        return new function_body_leaf< continue_msg, continue_msg, B >(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+//! the leaf for function_body specialized for Output of continue_msg 
+template <typename Input, typename B> 
+class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > { 
+public: 
+    function_body_leaf( const B &_body ) : body(_body) { } 
+    continue_msg operator()(const Input &i) override { 
+        body(i); 
+        return continue_msg(); 
+    } 
+    B get_body() { return body; } 
+    function_body_leaf* clone() override { 
+        return new function_body_leaf< Input, continue_msg, B >(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+//! the leaf for function_body specialized for Input of continue_msg 
+template <typename Output, typename B> 
+class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > { 
+public: 
+    function_body_leaf( const B &_body ) : body(_body) { } 
+    Output operator()(const continue_msg &i) override { 
+        return body(i); 
+    } 
+    B get_body() { return body; } 
+    function_body_leaf* clone() override { 
+        return new function_body_leaf< continue_msg, Output, B >(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+//! function_body that takes an Input and a set of output ports 
+template<typename Input, typename OutputSet> 
+class multifunction_body : no_assign { 
+public: 
+    virtual ~multifunction_body () {} 
+    virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0; 
+    virtual multifunction_body* clone() = 0; 
+    virtual void* get_body_ptr() = 0; 
+}; 
+ 
+//! leaf for multifunction.  OutputSet can be a std::tuple or a vector. 
+template<typename Input, typename OutputSet, typename B > 
+class multifunction_body_leaf : public multifunction_body<Input, OutputSet> { 
+public: 
+    multifunction_body_leaf(const B &_body) : body(_body) { } 
+    void operator()(const Input &input, OutputSet &oset) override { 
+        body(input, oset); // body may explicitly put() to one or more of oset. 
+    } 
+    void* get_body_ptr() override { return &body; } 
+    multifunction_body_leaf* clone() override { 
+        return new multifunction_body_leaf<Input, OutputSet,B>(body); 
+    } 
+ 
+private: 
+    B body; 
+}; 
+ 
+// ------ function bodies for hash_buffers and key-matching joins. 
+ 
+template<typename Input, typename Output> 
+class type_to_key_function_body : no_assign { 
+    public: 
+        virtual ~type_to_key_function_body() {} 
+        virtual Output operator()(const Input &input) = 0;  // returns an Output 
+        virtual type_to_key_function_body* clone() = 0; 
+}; 
+ 
+// specialization for ref output 
+template<typename Input, typename Output> 
+class type_to_key_function_body<Input,Output&> : no_assign { 
+    public: 
+        virtual ~type_to_key_function_body() {} 
+        virtual const Output & operator()(const Input &input) = 0;  // returns a const Output& 
+        virtual type_to_key_function_body* clone() = 0; 
+}; 
+ 
+template <typename Input, typename Output, typename B> 
+class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> { 
+public: 
+    type_to_key_function_body_leaf( const B &_body ) : body(_body) { } 
+    Output operator()(const Input &i) override { return body(i); } 
+    type_to_key_function_body_leaf* clone() override { 
+        return new type_to_key_function_body_leaf< Input, Output, B>(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+template <typename Input, typename Output, typename B> 
+class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> { 
+public: 
+    type_to_key_function_body_leaf( const B &_body ) : body(_body) { } 
+    const Output& operator()(const Input &i) override { 
+        return body(i); 
+    } 
+    type_to_key_function_body_leaf* clone() override { 
+        return new type_to_key_function_body_leaf< Input, Output&, B>(body); 
+    } 
+private: 
+    B body; 
+}; 
+ 
+// --------------------------- end of function_body containers ------------------------ 
+ 
+// --------------------------- node task bodies --------------------------------------- 
+ 
+//! A task that calls a node's forward_task function 
+template< typename NodeType > 
+class forward_task_bypass : public graph_task { 
+    NodeType &my_node; 
+public: 
+    forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n 
+                         , node_priority_t node_priority = no_priority 
+    ) : graph_task(g, allocator, node_priority), 
+    my_node(n) {} 
+ 
+    task* execute(execution_data& ed) override { 
+        graph_task* next_task = my_node.forward_task(); 
+        if (SUCCESSFULLY_ENQUEUED == next_task) 
+            next_task = nullptr; 
+        else if (next_task) 
+            next_task = prioritize_task(my_node.graph_reference(), *next_task); 
+        finalize(ed); 
+        return next_task; 
+    } 
+}; 
+ 
+//! A task that calls a node's apply_body_bypass function, passing in an input of type Input 
+//  return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL 
+template< typename NodeType, typename Input > 
+class apply_body_task_bypass : public graph_task { 
+    NodeType &my_node; 
+    Input my_input; 
+public: 
+ 
+    apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i 
+                            , node_priority_t node_priority = no_priority 
+    ) : graph_task(g, allocator, node_priority), 
+        my_node(n), my_input(i) {} 
+ 
+    task* execute(execution_data& ed) override { 
+        graph_task* next_task = my_node.apply_body_bypass( my_input ); 
+        if (SUCCESSFULLY_ENQUEUED == next_task) 
+            next_task = nullptr; 
+        else if (next_task) 
+            next_task = prioritize_task(my_node.graph_reference(), *next_task); 
+        finalize(ed); 
+        return next_task; 
+ 
+    } 
+}; 
+ 
+//! A task that calls a node's apply_body_bypass function with no input 
+template< typename NodeType > 
+class input_node_task_bypass : public graph_task { 
+    NodeType &my_node; 
+public: 
+    input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) 
+        : graph_task(g, allocator), my_node(n) {} 
+ 
+    task* execute(execution_data& ed) override { 
+        graph_task* next_task = my_node.apply_body_bypass( ); 
+        if (SUCCESSFULLY_ENQUEUED == next_task) 
+            next_task = nullptr; 
+        else if (next_task) 
+            next_task = prioritize_task(my_node.graph_reference(), *next_task); 
+        finalize(ed); 
+        return next_task; 
+    } 
+ 
+}; 
+ 
+// ------------------------ end of node task bodies ----------------------------------- 
+ 
+template<typename T, typename DecrementType, typename DummyType = void> 
+class threshold_regulator; 
+ 
+template<typename T, typename DecrementType> 
+class threshold_regulator<T, DecrementType, 
+                  typename std::enable_if<std::is_integral<DecrementType>::value>::type> 
+    : public receiver<DecrementType>, no_copy 
+{ 
+    T* my_node; 
+protected: 
+ 
+    graph_task* try_put_task( const DecrementType& value ) override { 
+        graph_task* result = my_node->decrement_counter( value ); 
+        if( !result ) 
+            result = SUCCESSFULLY_ENQUEUED; 
+        return result; 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_node->my_graph; 
+    } 
+ 
+    template<typename U, typename V> friend class limiter_node; 
+    void reset_receiver( reset_flags ) {} 
+ 
+public: 
+    threshold_regulator(T* owner) : my_node(owner) { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+}; 
+ 
+template<typename T> 
+class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy { 
+ 
+    T *my_node; 
+ 
+    graph_task* execute() override { 
+        return my_node->decrement_counter( 1 ); 
+    } 
+ 
+protected: 
+ 
+    graph& graph_reference() const override { 
+        return my_node->my_graph; 
+    } 
+ 
+public: 
+ 
+    typedef continue_msg input_type; 
+    typedef continue_msg output_type; 
+    threshold_regulator(T* owner) 
+        : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) 
+    { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+}; 
+ 
+#endif // __TBB__flow_graph_body_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h
index ac5564598b..22d31cdcbb 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h
@@ -1,435 +1,435 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_cache_impl_H
-#define __TBB__flow_graph_cache_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::detail::d1 (in flow_graph.h)
-
-//! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock.
-template< typename T, typename M=spin_mutex >
-class node_cache {
-    public:
-
-    typedef size_t size_type;
-
-    bool empty() {
-        typename mutex_type::scoped_lock lock( my_mutex );
-        return internal_empty();
-    }
-
-    void add( T &n ) {
-        typename mutex_type::scoped_lock lock( my_mutex );
-        internal_push(n);
-    }
-
-    void remove( T &n ) {
-        typename mutex_type::scoped_lock lock( my_mutex );
-        for ( size_t i = internal_size(); i != 0; --i ) {
-            T &s = internal_pop();
-            if ( &s == &n )
-                break;  // only remove one predecessor per request
-            internal_push(s);
-        }
-    }
-
-    void clear() {
-        while( !my_q.empty()) (void)my_q.pop();
-    }
-
-protected:
-
-    typedef M mutex_type;
-    mutex_type my_mutex;
-    std::queue< T * > my_q;
-
-    // Assumes lock is held
-    inline bool internal_empty( )  {
-        return my_q.empty();
-    }
-
-    // Assumes lock is held
-    inline size_type internal_size( )  {
-        return my_q.size();
-    }
-
-    // Assumes lock is held
-    inline void internal_push( T &n )  {
-        my_q.push(&n);
-    }
-
-    // Assumes lock is held
-    inline T &internal_pop() {
-        T *v = my_q.front();
-        my_q.pop();
-        return *v;
-    }
-
-};
-
-//! A cache of predecessors that only supports try_get
-template< typename T, typename M=spin_mutex >
-class predecessor_cache : public node_cache< sender<T>, M > {
-public:
-    typedef M mutex_type;
-    typedef T output_type;
-    typedef sender<output_type> predecessor_type;
-    typedef receiver<output_type> successor_type;
-
-    predecessor_cache( successor_type* owner ) : my_owner( owner ) {
-        __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." );
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    bool get_item( output_type& v ) {
-
-        bool msg = false;
-
-        do {
-            predecessor_type *src;
-            {
-                typename mutex_type::scoped_lock lock(this->my_mutex);
-                if ( this->internal_empty() ) {
-                    break;
-                }
-                src = &this->internal_pop();
-            }
-
-            // Try to get from this sender
-            msg = src->try_get( v );
-
-            if (msg == false) {
-                // Relinquish ownership of the edge
-                register_successor(*src, *my_owner);
-            } else {
-                // Retain ownership of the edge
-                this->add(*src);
-            }
-        } while ( msg == false );
-        return msg;
-    }
-
-    // If we are removing arcs (rf_clear_edges), call clear() rather than reset().
-    void reset() {
-        for(;;) {
-            predecessor_type *src;
-            {
-                if (this->internal_empty()) break;
-                src = &this->internal_pop();
-            }
-            register_successor(*src, *my_owner);
-        }
-    }
-
-protected:
-    successor_type* my_owner;
-};
-
-//! An cache of predecessors that supports requests and reservations
-template< typename T, typename M=spin_mutex >
-class reservable_predecessor_cache : public predecessor_cache< T, M > {
-public:
-    typedef M mutex_type;
-    typedef T output_type;
-    typedef sender<T> predecessor_type;
-    typedef receiver<T> successor_type;
-
-    reservable_predecessor_cache( successor_type* owner )
-        : predecessor_cache<T,M>(owner), reserved_src(NULL)
-    {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    bool
-    try_reserve( output_type &v ) {
-        bool msg = false;
-
-        do {
-            {
-                typename mutex_type::scoped_lock lock(this->my_mutex);
-                if ( reserved_src || this->internal_empty() )
-                    return false;
-
-                reserved_src = &this->internal_pop();
-            }
-
-            // Try to get from this sender
-            msg = reserved_src->try_reserve( v );
-
-            if (msg == false) {
-                typename mutex_type::scoped_lock lock(this->my_mutex);
-                // Relinquish ownership of the edge
-                register_successor( *reserved_src, *this->my_owner );
-                reserved_src = NULL;
-            } else {
-                // Retain ownership of the edge
-                this->add( *reserved_src );
-            }
-        } while ( msg == false );
-
-        return msg;
-    }
-
-    bool
-    try_release( ) {
-        reserved_src->try_release( );
-        reserved_src = NULL;
-        return true;
-    }
-
-    bool
-    try_consume( ) {
-        reserved_src->try_consume( );
-        reserved_src = NULL;
-        return true;
-    }
-
-    void reset( ) {
-        reserved_src = NULL;
-        predecessor_cache<T,M>::reset( );
-    }
-
-    void clear() {
-        reserved_src = NULL;
-        predecessor_cache<T,M>::clear();
-    }
-
-private:
-    predecessor_type *reserved_src;
-};
-
-
-//! An abstract cache of successors
-template<typename T, typename M=spin_rw_mutex >
-class successor_cache : no_copy {
-protected:
-
-    typedef M mutex_type;
-    mutex_type my_mutex;
-
-    typedef receiver<T> successor_type;
-    typedef receiver<T>* pointer_type;
-    typedef sender<T> owner_type;
-    // TODO revamp: introduce heapified collection of successors for strict priorities
-    typedef std::list< pointer_type > successors_type;
-    successors_type my_successors;
-
-    owner_type* my_owner;
-
-public:
-    successor_cache( owner_type* owner ) : my_owner(owner) {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    virtual ~successor_cache() {}
-
-    void register_successor( successor_type& r ) {
-        typename mutex_type::scoped_lock l(my_mutex, true);
-        if( r.priority() != no_priority )
-            my_successors.push_front( &r );
-        else
-            my_successors.push_back( &r );
-    }
-
-    void remove_successor( successor_type& r ) {
-        typename mutex_type::scoped_lock l(my_mutex, true);
-        for ( typename successors_type::iterator i = my_successors.begin();
-              i != my_successors.end(); ++i ) {
-            if ( *i == & r ) {
-                my_successors.erase(i);
-                break;
-            }
-        }
-    }
-
-    bool empty() {
-        typename mutex_type::scoped_lock l(my_mutex, false);
-        return my_successors.empty();
-    }
-
-    void clear() {
-        my_successors.clear();
-    }
-
-    virtual graph_task* try_put_task( const T& t ) = 0;
-};  // successor_cache<T>
-
-//! An abstract cache of successors, specialized to continue_msg
-template<typename M>
-class successor_cache< continue_msg, M > : no_copy {
-protected:
-
-    typedef M mutex_type;
-    mutex_type my_mutex;
-
-    typedef receiver<continue_msg> successor_type;
-    typedef receiver<continue_msg>* pointer_type;
-    typedef sender<continue_msg> owner_type;
-    typedef std::list< pointer_type > successors_type;
-    successors_type my_successors;
-    owner_type* my_owner;
-
-public:
-    successor_cache( sender<continue_msg>* owner ) : my_owner(owner) {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    virtual ~successor_cache() {}
-
-    void register_successor( successor_type& r ) {
-        typename mutex_type::scoped_lock l(my_mutex, true);
-        if( r.priority() != no_priority )
-            my_successors.push_front( &r );
-        else
-            my_successors.push_back( &r );
-        __TBB_ASSERT( my_owner, "Cache of successors must have an owner." );
-        if ( r.is_continue_receiver() ) {
-            r.register_predecessor( *my_owner );
-        }
-    }
-
-    void remove_successor( successor_type& r ) {
-        typename mutex_type::scoped_lock l(my_mutex, true);
-        for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) {
-            if ( *i == &r ) {
-                __TBB_ASSERT(my_owner, "Cache of successors must have an owner.");
-                // TODO: check if we need to test for continue_receiver before removing from r.
-                r.remove_predecessor( *my_owner );
-                my_successors.erase(i);
-                break;
-            }
-        }
-    }
-
-    bool empty() {
-        typename mutex_type::scoped_lock l(my_mutex, false);
-        return my_successors.empty();
-    }
-
-    void clear() {
-        my_successors.clear();
-    }
-
-    virtual graph_task* try_put_task( const continue_msg& t ) = 0;
-};  // successor_cache< continue_msg >
-
-//! A cache of successors that are broadcast to
-template<typename T, typename M=spin_rw_mutex>
-class broadcast_cache : public successor_cache<T, M> {
-    typedef successor_cache<T, M> base_type;
-    typedef M mutex_type;
-    typedef typename successor_cache<T,M>::successors_type successors_type;
-
-public:
-
-    broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    // as above, but call try_put_task instead, and return the last task we received (if any)
-    graph_task* try_put_task( const T &t ) override {
-        graph_task * last_task = nullptr;
-        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
-        typename successors_type::iterator i = this->my_successors.begin();
-        while ( i != this->my_successors.end() ) {
-            graph_task *new_task = (*i)->try_put_task(t);
-            // workaround for icc bug
-            graph& graph_ref = (*i)->graph_reference();
-            last_task = combine_tasks(graph_ref, last_task, new_task);  // enqueue if necessary
-            if(new_task) {
-                ++i;
-            }
-            else {  // failed
-                if ( (*i)->register_predecessor(*this->my_owner) ) {
-                    i = this->my_successors.erase(i);
-                } else {
-                    ++i;
-                }
-            }
-        }
-        return last_task;
-    }
-
-    // call try_put_task and return list of received tasks
-    bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) {
-        bool is_at_least_one_put_successful = false;
-        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
-        typename successors_type::iterator i = this->my_successors.begin();
-        while ( i != this->my_successors.end() ) {
-            graph_task * new_task = (*i)->try_put_task(t);
-            if(new_task) {
-                ++i;
-                if(new_task != SUCCESSFULLY_ENQUEUED) {
-                    tasks.push_back(*new_task);
-                }
-                is_at_least_one_put_successful = true;
-            }
-            else {  // failed
-                if ( (*i)->register_predecessor(*this->my_owner) ) {
-                    i = this->my_successors.erase(i);
-                } else {
-                    ++i;
-                }
-            }
-        }
-        return is_at_least_one_put_successful;
-    }
-};
-
-//! A cache of successors that are put in a round-robin fashion
-template<typename T, typename M=spin_rw_mutex >
-class round_robin_cache : public successor_cache<T, M> {
-    typedef successor_cache<T, M> base_type;
-    typedef size_t size_type;
-    typedef M mutex_type;
-    typedef typename successor_cache<T,M>::successors_type successors_type;
-
-public:
-
-    round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) {
-        // Do not work with the passed pointer here as it may not be fully initialized yet
-    }
-
-    size_type size() {
-        typename mutex_type::scoped_lock l(this->my_mutex, false);
-        return this->my_successors.size();
-    }
-
-    graph_task* try_put_task( const T &t ) override {
-        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
-        typename successors_type::iterator i = this->my_successors.begin();
-        while ( i != this->my_successors.end() ) {
-            graph_task* new_task = (*i)->try_put_task(t);
-            if ( new_task ) {
-                return new_task;
-            } else {
-               if ( (*i)->register_predecessor(*this->my_owner) ) {
-                   i = this->my_successors.erase(i);
-               }
-               else {
-                   ++i;
-               }
-            }
-        }
-        return NULL;
-    }
-};
-
-#endif // __TBB__flow_graph_cache_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_cache_impl_H 
+#define __TBB__flow_graph_cache_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included in namespace tbb::detail::d1 (in flow_graph.h) 
+ 
+//! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock. 
+template< typename T, typename M=spin_mutex > 
+class node_cache { 
+    public: 
+ 
+    typedef size_t size_type; 
+ 
+    bool empty() { 
+        typename mutex_type::scoped_lock lock( my_mutex ); 
+        return internal_empty(); 
+    } 
+ 
+    void add( T &n ) { 
+        typename mutex_type::scoped_lock lock( my_mutex ); 
+        internal_push(n); 
+    } 
+ 
+    void remove( T &n ) { 
+        typename mutex_type::scoped_lock lock( my_mutex ); 
+        for ( size_t i = internal_size(); i != 0; --i ) { 
+            T &s = internal_pop(); 
+            if ( &s == &n ) 
+                break;  // only remove one predecessor per request 
+            internal_push(s); 
+        } 
+    } 
+ 
+    void clear() { 
+        while( !my_q.empty()) (void)my_q.pop(); 
+    } 
+ 
+protected: 
+ 
+    typedef M mutex_type; 
+    mutex_type my_mutex; 
+    std::queue< T * > my_q; 
+ 
+    // Assumes lock is held 
+    inline bool internal_empty( )  { 
+        return my_q.empty(); 
+    } 
+ 
+    // Assumes lock is held 
+    inline size_type internal_size( )  { 
+        return my_q.size(); 
+    } 
+ 
+    // Assumes lock is held 
+    inline void internal_push( T &n )  { 
+        my_q.push(&n); 
+    } 
+ 
+    // Assumes lock is held 
+    inline T &internal_pop() { 
+        T *v = my_q.front(); 
+        my_q.pop(); 
+        return *v; 
+    } 
+ 
+}; 
+ 
+//! A cache of predecessors that only supports try_get 
+template< typename T, typename M=spin_mutex > 
+class predecessor_cache : public node_cache< sender<T>, M > { 
+public: 
+    typedef M mutex_type; 
+    typedef T output_type; 
+    typedef sender<output_type> predecessor_type; 
+    typedef receiver<output_type> successor_type; 
+ 
+    predecessor_cache( successor_type* owner ) : my_owner( owner ) { 
+        __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    bool get_item( output_type& v ) { 
+ 
+        bool msg = false; 
+ 
+        do { 
+            predecessor_type *src; 
+            { 
+                typename mutex_type::scoped_lock lock(this->my_mutex); 
+                if ( this->internal_empty() ) { 
+                    break; 
+                } 
+                src = &this->internal_pop(); 
+            } 
+ 
+            // Try to get from this sender 
+            msg = src->try_get( v ); 
+ 
+            if (msg == false) { 
+                // Relinquish ownership of the edge 
+                register_successor(*src, *my_owner); 
+            } else { 
+                // Retain ownership of the edge 
+                this->add(*src); 
+            } 
+        } while ( msg == false ); 
+        return msg; 
+    } 
+ 
+    // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). 
+    void reset() { 
+        for(;;) { 
+            predecessor_type *src; 
+            { 
+                if (this->internal_empty()) break; 
+                src = &this->internal_pop(); 
+            } 
+            register_successor(*src, *my_owner); 
+        } 
+    } 
+ 
+protected: 
+    successor_type* my_owner; 
+}; 
+ 
+//! An cache of predecessors that supports requests and reservations 
+template< typename T, typename M=spin_mutex > 
+class reservable_predecessor_cache : public predecessor_cache< T, M > { 
+public: 
+    typedef M mutex_type; 
+    typedef T output_type; 
+    typedef sender<T> predecessor_type; 
+    typedef receiver<T> successor_type; 
+ 
+    reservable_predecessor_cache( successor_type* owner ) 
+        : predecessor_cache<T,M>(owner), reserved_src(NULL) 
+    { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    bool 
+    try_reserve( output_type &v ) { 
+        bool msg = false; 
+ 
+        do { 
+            { 
+                typename mutex_type::scoped_lock lock(this->my_mutex); 
+                if ( reserved_src || this->internal_empty() ) 
+                    return false; 
+ 
+                reserved_src = &this->internal_pop(); 
+            } 
+ 
+            // Try to get from this sender 
+            msg = reserved_src->try_reserve( v ); 
+ 
+            if (msg == false) { 
+                typename mutex_type::scoped_lock lock(this->my_mutex); 
+                // Relinquish ownership of the edge 
+                register_successor( *reserved_src, *this->my_owner ); 
+                reserved_src = NULL; 
+            } else { 
+                // Retain ownership of the edge 
+                this->add( *reserved_src ); 
+            } 
+        } while ( msg == false ); 
+ 
+        return msg; 
+    } 
+ 
+    bool 
+    try_release( ) { 
+        reserved_src->try_release( ); 
+        reserved_src = NULL; 
+        return true; 
+    } 
+ 
+    bool 
+    try_consume( ) { 
+        reserved_src->try_consume( ); 
+        reserved_src = NULL; 
+        return true; 
+    } 
+ 
+    void reset( ) { 
+        reserved_src = NULL; 
+        predecessor_cache<T,M>::reset( ); 
+    } 
+ 
+    void clear() { 
+        reserved_src = NULL; 
+        predecessor_cache<T,M>::clear(); 
+    } 
+ 
+private: 
+    predecessor_type *reserved_src; 
+}; 
+ 
+ 
+//! An abstract cache of successors 
+template<typename T, typename M=spin_rw_mutex > 
+class successor_cache : no_copy { 
+protected: 
+ 
+    typedef M mutex_type; 
+    mutex_type my_mutex; 
+ 
+    typedef receiver<T> successor_type; 
+    typedef receiver<T>* pointer_type; 
+    typedef sender<T> owner_type; 
+    // TODO revamp: introduce heapified collection of successors for strict priorities 
+    typedef std::list< pointer_type > successors_type; 
+    successors_type my_successors; 
+ 
+    owner_type* my_owner; 
+ 
+public: 
+    successor_cache( owner_type* owner ) : my_owner(owner) { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    virtual ~successor_cache() {} 
+ 
+    void register_successor( successor_type& r ) { 
+        typename mutex_type::scoped_lock l(my_mutex, true); 
+        if( r.priority() != no_priority ) 
+            my_successors.push_front( &r ); 
+        else 
+            my_successors.push_back( &r ); 
+    } 
+ 
+    void remove_successor( successor_type& r ) { 
+        typename mutex_type::scoped_lock l(my_mutex, true); 
+        for ( typename successors_type::iterator i = my_successors.begin(); 
+              i != my_successors.end(); ++i ) { 
+            if ( *i == & r ) { 
+                my_successors.erase(i); 
+                break; 
+            } 
+        } 
+    } 
+ 
+    bool empty() { 
+        typename mutex_type::scoped_lock l(my_mutex, false); 
+        return my_successors.empty(); 
+    } 
+ 
+    void clear() { 
+        my_successors.clear(); 
+    } 
+ 
+    virtual graph_task* try_put_task( const T& t ) = 0; 
+};  // successor_cache<T> 
+ 
+//! An abstract cache of successors, specialized to continue_msg 
+template<typename M> 
+class successor_cache< continue_msg, M > : no_copy { 
+protected: 
+ 
+    typedef M mutex_type; 
+    mutex_type my_mutex; 
+ 
+    typedef receiver<continue_msg> successor_type; 
+    typedef receiver<continue_msg>* pointer_type; 
+    typedef sender<continue_msg> owner_type; 
+    typedef std::list< pointer_type > successors_type; 
+    successors_type my_successors; 
+    owner_type* my_owner; 
+ 
+public: 
+    successor_cache( sender<continue_msg>* owner ) : my_owner(owner) { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    virtual ~successor_cache() {} 
+ 
+    void register_successor( successor_type& r ) { 
+        typename mutex_type::scoped_lock l(my_mutex, true); 
+        if( r.priority() != no_priority ) 
+            my_successors.push_front( &r ); 
+        else 
+            my_successors.push_back( &r ); 
+        __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); 
+        if ( r.is_continue_receiver() ) { 
+            r.register_predecessor( *my_owner ); 
+        } 
+    } 
+ 
+    void remove_successor( successor_type& r ) { 
+        typename mutex_type::scoped_lock l(my_mutex, true); 
+        for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { 
+            if ( *i == &r ) { 
+                __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); 
+                // TODO: check if we need to test for continue_receiver before removing from r. 
+                r.remove_predecessor( *my_owner ); 
+                my_successors.erase(i); 
+                break; 
+            } 
+        } 
+    } 
+ 
+    bool empty() { 
+        typename mutex_type::scoped_lock l(my_mutex, false); 
+        return my_successors.empty(); 
+    } 
+ 
+    void clear() { 
+        my_successors.clear(); 
+    } 
+ 
+    virtual graph_task* try_put_task( const continue_msg& t ) = 0; 
+};  // successor_cache< continue_msg > 
+ 
+//! A cache of successors that are broadcast to 
+template<typename T, typename M=spin_rw_mutex> 
+class broadcast_cache : public successor_cache<T, M> { 
+    typedef successor_cache<T, M> base_type; 
+    typedef M mutex_type; 
+    typedef typename successor_cache<T,M>::successors_type successors_type; 
+ 
+public: 
+ 
+    broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    // as above, but call try_put_task instead, and return the last task we received (if any) 
+    graph_task* try_put_task( const T &t ) override { 
+        graph_task * last_task = nullptr; 
+        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); 
+        typename successors_type::iterator i = this->my_successors.begin(); 
+        while ( i != this->my_successors.end() ) { 
+            graph_task *new_task = (*i)->try_put_task(t); 
+            // workaround for icc bug 
+            graph& graph_ref = (*i)->graph_reference(); 
+            last_task = combine_tasks(graph_ref, last_task, new_task);  // enqueue if necessary 
+            if(new_task) { 
+                ++i; 
+            } 
+            else {  // failed 
+                if ( (*i)->register_predecessor(*this->my_owner) ) { 
+                    i = this->my_successors.erase(i); 
+                } else { 
+                    ++i; 
+                } 
+            } 
+        } 
+        return last_task; 
+    } 
+ 
+    // call try_put_task and return list of received tasks 
+    bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { 
+        bool is_at_least_one_put_successful = false; 
+        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); 
+        typename successors_type::iterator i = this->my_successors.begin(); 
+        while ( i != this->my_successors.end() ) { 
+            graph_task * new_task = (*i)->try_put_task(t); 
+            if(new_task) { 
+                ++i; 
+                if(new_task != SUCCESSFULLY_ENQUEUED) { 
+                    tasks.push_back(*new_task); 
+                } 
+                is_at_least_one_put_successful = true; 
+            } 
+            else {  // failed 
+                if ( (*i)->register_predecessor(*this->my_owner) ) { 
+                    i = this->my_successors.erase(i); 
+                } else { 
+                    ++i; 
+                } 
+            } 
+        } 
+        return is_at_least_one_put_successful; 
+    } 
+}; 
+ 
+//! A cache of successors that are put in a round-robin fashion 
+template<typename T, typename M=spin_rw_mutex > 
+class round_robin_cache : public successor_cache<T, M> { 
+    typedef successor_cache<T, M> base_type; 
+    typedef size_t size_type; 
+    typedef M mutex_type; 
+    typedef typename successor_cache<T,M>::successors_type successors_type; 
+ 
+public: 
+ 
+    round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { 
+        // Do not work with the passed pointer here as it may not be fully initialized yet 
+    } 
+ 
+    size_type size() { 
+        typename mutex_type::scoped_lock l(this->my_mutex, false); 
+        return this->my_successors.size(); 
+    } 
+ 
+    graph_task* try_put_task( const T &t ) override { 
+        typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); 
+        typename successors_type::iterator i = this->my_successors.begin(); 
+        while ( i != this->my_successors.end() ) { 
+            graph_task* new_task = (*i)->try_put_task(t); 
+            if ( new_task ) { 
+                return new_task; 
+            } else { 
+               if ( (*i)->register_predecessor(*this->my_owner) ) { 
+                   i = this->my_successors.erase(i); 
+               } 
+               else { 
+                   ++i; 
+               } 
+            } 
+        } 
+        return NULL; 
+    } 
+}; 
+ 
+#endif // __TBB__flow_graph_cache_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h
index a3d17cfb1c..1c4e6c8917 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h
@@ -1,488 +1,488 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_flow_graph_impl_H
-#define __TBB_flow_graph_impl_H
-
-// #include "../config.h"
-#include "_task.h"
-#include "tbb/task_group.h"
-#include "../task_arena.h"
-#include "../flow_graph_abstractions.h"
-
-#include "../concurrent_priority_queue.h"
-
-#include <list>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-
-class graph_task;
-static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1;
-typedef unsigned int node_priority_t;
-static const node_priority_t no_priority = node_priority_t(0);
-
-class graph;
-class graph_node;
-
-template <typename GraphContainerType, typename GraphNodeType>
-class graph_iterator {
-    friend class graph;
-    friend class graph_node;
-public:
-    typedef size_t size_type;
-    typedef GraphNodeType value_type;
-    typedef GraphNodeType* pointer;
-    typedef GraphNodeType& reference;
-    typedef const GraphNodeType& const_reference;
-    typedef std::forward_iterator_tag iterator_category;
-
-    //! Copy constructor
-    graph_iterator(const graph_iterator& other) :
-        my_graph(other.my_graph), current_node(other.current_node)
-    {}
-
-    //! Assignment
-    graph_iterator& operator=(const graph_iterator& other) {
-        if (this != &other) {
-            my_graph = other.my_graph;
-            current_node = other.current_node;
-        }
-        return *this;
-    }
-
-    //! Dereference
-    reference operator*() const;
-
-    //! Dereference
-    pointer operator->() const;
-
-    //! Equality
-    bool operator==(const graph_iterator& other) const {
-        return ((my_graph == other.my_graph) && (current_node == other.current_node));
-    }
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-    //! Inequality
-    bool operator!=(const graph_iterator& other) const { return !(operator==(other)); }
-#endif
-
-    //! Pre-increment
-    graph_iterator& operator++() {
-        internal_forward();
-        return *this;
-    }
-
-    //! Post-increment
-    graph_iterator operator++(int) {
-        graph_iterator result = *this;
-        operator++();
-        return result;
-    }
-
-private:
-    // the graph over which we are iterating
-    GraphContainerType *my_graph;
-    // pointer into my_graph's my_nodes list
-    pointer current_node;
-
-    //! Private initializing constructor for begin() and end() iterators
-    graph_iterator(GraphContainerType *g, bool begin);
-    void internal_forward();
-};  // class graph_iterator
-
-// flags to modify the behavior of the graph reset().  Can be combined.
-enum reset_flags {
-    rf_reset_protocol = 0,
-    rf_reset_bodies = 1 << 0,  // delete the current node body, reset to a copy of the initial node body.
-    rf_clear_edges = 1 << 1   // delete edges
-};
-
-void activate_graph(graph& g);
-void deactivate_graph(graph& g);
-bool is_graph_active(graph& g);
-graph_task* prioritize_task(graph& g, graph_task& arena_task);
-void spawn_in_graph_arena(graph& g, graph_task& arena_task);
-void enqueue_in_graph_arena(graph &g, graph_task& arena_task);
-
-class graph;
-
-//! Base class for tasks generated by graph nodes.
-class graph_task : public task {
-public:
-    graph_task(graph& g, small_object_allocator& allocator
-               , node_priority_t node_priority = no_priority
-    )
-        : my_graph(g)
-        , priority(node_priority)
-        , my_allocator(allocator)
-    {}
-    graph& my_graph; // graph instance the task belongs to
-    // TODO revamp: rename to my_priority
-    node_priority_t priority;
-    void destruct_and_deallocate(const execution_data& ed);
-    task* cancel(execution_data& ed) override;
-protected:
-    void finalize(const execution_data& ed);
-private:
-    // To organize task_list
-    graph_task* my_next{ nullptr };
-    small_object_allocator my_allocator;
-    // TODO revamp: elaborate internal interfaces to avoid friends declarations
-    friend class graph_task_list;
-    friend graph_task* prioritize_task(graph& g, graph_task& gt);
-};
-
-struct graph_task_comparator {
-    bool operator()(const graph_task* left, const graph_task* right) {
-        return left->priority < right->priority;
-    }
-};
-
-typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t;
-
-class priority_task_selector : public task {
-public:
-    priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator)
-        : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {}
-    task* execute(execution_data& ed) override {
-        next_task();
-        __TBB_ASSERT(my_task, nullptr);
-        task* t_next = my_task->execute(ed);
-        my_allocator.delete_object(this, ed);
-        return t_next;
-    }
-    task* cancel(execution_data& ed) override {
-        if (!my_task) {
-            next_task();
-        }
-        __TBB_ASSERT(my_task, nullptr);
-        task* t_next = my_task->cancel(ed);
-        my_allocator.delete_object(this, ed);
-        return t_next;
-    }
-private:
-    void next_task() {
-        // TODO revamp: hold functors in priority queue instead of real tasks
-        bool result = my_priority_queue.try_pop(my_task);
-        __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks"
-            " in graph's priority queue mismatched");
-        __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED,
-            "Incorrect task submitted to graph priority queue");
-        __TBB_ASSERT(my_task->priority != no_priority,
-            "Tasks from graph's priority queue must have priority");
-    }
-
-    graph_task_priority_queue_t& my_priority_queue;
-    small_object_allocator my_allocator;
-    graph_task* my_task;
-};
-
-template <typename Receiver, typename Body> class run_and_put_task;
-template <typename Body> class run_task;
-
-//********************************************************************************
-// graph tasks helpers
-//********************************************************************************
-
-//! The list of graph tasks
-class graph_task_list : no_copy {
-private:
-    graph_task* my_first;
-    graph_task** my_next_ptr;
-public:
-    //! Construct empty list
-    graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {}
-
-    //! True if list is empty; false otherwise.
-    bool empty() const { return !my_first; }
-
-    //! Push task onto back of list.
-    void push_back(graph_task& task) {
-        task.my_next = nullptr;
-        *my_next_ptr = &task;
-        my_next_ptr = &task.my_next;
-    }
-
-    //! Pop the front task from the list.
-    graph_task& pop_front() {
-        __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list");
-        graph_task* result = my_first;
-        my_first = result->my_next;
-        if (!my_first) {
-            my_next_ptr = &my_first;
-        }
-        return *result;
-    }
-};
-
-//! The graph class
-/** This class serves as a handle to the graph */
-class graph : no_copy, public graph_proxy {
-    friend class graph_node;
-
-    void prepare_task_arena(bool reinit = false) {
-        if (reinit) {
-            __TBB_ASSERT(my_task_arena, "task arena is NULL");
-            my_task_arena->terminate();
-            my_task_arena->initialize(task_arena::attach());
-        }
-        else {
-            __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL");
-            my_task_arena = new task_arena(task_arena::attach());
-        }
-        if (!my_task_arena->is_active()) // failed to attach
-            my_task_arena->initialize(); // create a new, default-initialized arena
-        __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active");
-    }
-
-public:
-    //! Constructs a graph with isolated task_group_context
-    graph();
-
-    //! Constructs a graph with use_this_context as context
-    explicit graph(task_group_context& use_this_context);
-
-    //! Destroys the graph.
-    /** Calls wait_for_all, then destroys the root task and context. */
-    ~graph();
-
-    //! Used to register that an external entity may still interact with the graph.
-    /** The graph will not return from wait_for_all until a matching number of release_wait calls is
-    made. */
-    void reserve_wait() override;
-
-    //! Deregisters an external entity that may have interacted with the graph.
-    /** The graph will not return from wait_for_all until all the number of reserve_wait calls
-    matches the number of release_wait calls. */
-    void release_wait() override;
-
-    //! Wait until graph is idle and the number of release_wait calls equals to the number of
-    //! reserve_wait calls.
-    /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */
-    void wait_for_all() {
-        cancelled = false;
-        caught_exception = false;
-        try_call([this] {
-            my_task_arena->execute([this] {
-                wait(my_wait_context, *my_context);
-            });
-            cancelled = my_context->is_group_execution_cancelled();
-        }).on_exception([this] {
-            my_context->reset();
-            caught_exception = true;
-            cancelled = true;
-        });
-        // TODO: the "if" condition below is just a work-around to support the concurrent wait
-        // mode. The cancellation and exception mechanisms are still broken in this mode.
-        // Consider using task group not to re-implement the same functionality.
-        if (!(my_context->traits() & task_group_context::concurrent_wait)) {
-            my_context->reset();  // consistent with behavior in catch()
-        }
-    }
-
-#if TODO_REVAMP
-#error Decide on ref_count() presence.
-    Its only use is in the template<typename T, typename BufferType> void test_resets()
-#endif
-
-#if __TBB_EXTRA_DEBUG
-    unsigned ref_count() const { return my_wait_context.reference_count(); }
-#endif
-
-
-    // TODO revamp: consider adding getter for task_group_context.
-
-    // ITERATORS
-    template<typename C, typename N>
-    friend class graph_iterator;
-
-    // Graph iterator typedefs
-    typedef graph_iterator<graph, graph_node> iterator;
-    typedef graph_iterator<const graph, const graph_node> const_iterator;
-
-    // Graph iterator constructors
-    //! start iterator
-    iterator begin();
-    //! end iterator
-    iterator end();
-    //! start const iterator
-    const_iterator begin() const;
-    //! end const iterator
-    const_iterator end() const;
-    //! start const iterator
-    const_iterator cbegin() const;
-    //! end const iterator
-    const_iterator cend() const;
-
-    // thread-unsafe state reset.
-    void reset(reset_flags f = rf_reset_protocol);
-
-    //! cancels execution of the associated task_group_context
-    void cancel();
-
-    //! return status of graph execution
-    bool is_cancelled() { return cancelled; }
-    bool exception_thrown() { return caught_exception; }
-
-private:
-    wait_context my_wait_context;
-    task_group_context *my_context;
-    bool own_context;
-    bool cancelled;
-    bool caught_exception;
-    bool my_is_active;
-
-    graph_node *my_nodes, *my_nodes_last;
-
-    tbb::spin_mutex nodelist_mutex;
-    void register_node(graph_node *n);
-    void remove_node(graph_node *n);
-
-    task_arena* my_task_arena;
-
-    graph_task_priority_queue_t my_priority_queue;
-
-    friend void activate_graph(graph& g);
-    friend void deactivate_graph(graph& g);
-    friend bool is_graph_active(graph& g);
-    friend graph_task* prioritize_task(graph& g, graph_task& arena_task);
-    friend void spawn_in_graph_arena(graph& g, graph_task& arena_task);
-    friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task);
-
-    friend class task_arena_base;
-
-};  // class graph
-
-inline void graph_task::destruct_and_deallocate(const execution_data& ed) {
-    auto allocator = my_allocator;
-    // TODO: investigate if direct call of derived destructor gives any benefits.
-    this->~graph_task();
-    allocator.deallocate(this, ed);
-}
-
-inline void graph_task::finalize(const execution_data& ed) {
-    graph& g = my_graph;
-    destruct_and_deallocate(ed);
-    g.release_wait();
-}
-
-inline task* graph_task::cancel(execution_data& ed) {
-    finalize(ed);
-    return nullptr;
-}
-
-//********************************************************************************
-// end of graph tasks helpers
-//********************************************************************************
-
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-class get_graph_helper;
-#endif
-
-//! The base of all graph nodes.
-class graph_node : no_copy {
-    friend class graph;
-    template<typename C, typename N>
-    friend class graph_iterator;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    friend class get_graph_helper;
-#endif
-
-protected:
-    graph& my_graph;
-    graph& graph_reference() const {
-        // TODO revamp: propagate graph_reference() method to all the reference places.
-        return my_graph;
-    }
-    graph_node* next = nullptr;
-    graph_node* prev = nullptr;
-public:
-    explicit graph_node(graph& g);
-
-    virtual ~graph_node();
-
-protected:
-    // performs the reset on an individual node.
-    virtual void reset_node(reset_flags f = rf_reset_protocol) = 0;
-};  // class graph_node
-
-inline void activate_graph(graph& g) {
-    g.my_is_active = true;
-}
-
-inline void deactivate_graph(graph& g) {
-    g.my_is_active = false;
-}
-
-inline bool is_graph_active(graph& g) {
-    return g.my_is_active;
-}
-
-inline graph_task* prioritize_task(graph& g, graph_task& gt) {
-    if( no_priority == gt.priority )
-        return &gt;
-
-    //! Non-preemptive priority pattern. The original task is submitted as a work item to the
-    //! priority queue, and a new critical task is created to take and execute a work item with
-    //! the highest known priority. The reference counting responsibility is transferred (via
-    //! allocate_continuation) to the new task.
-    task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator);
-    __TBB_ASSERT( critical_task, "bad_alloc?" );
-    g.my_priority_queue.push(&gt);
-    using tbb::detail::d1::submit;
-    submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true );
-    return nullptr;
-}
-
-//! Spawns a task inside graph arena
-inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) {
-    if (is_graph_active(g)) {
-        task* gt = prioritize_task(g, arena_task);
-        if( !gt )
-            return;
-
-        __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL);
-        submit( *gt, *g.my_task_arena, *g.my_context
-#if __TBB_PREVIEW_CRITICAL_TASKS
-                , /*as_critical=*/false
-#endif
-        );
-    }
-}
-
-// TODO revamp: unify *_in_graph_arena functions
-
-//! Enqueues a task inside graph arena
-inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) {
-    if (is_graph_active(g)) {
-        __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" );
-
-        // TODO revamp: decide on the approach that does not postpone critical task
-        if( task* gt = prioritize_task(g, arena_task) )
-            submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false);
-    }
-}
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_flow_graph_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_flow_graph_impl_H 
+#define __TBB_flow_graph_impl_H 
+ 
+// #include "../config.h" 
+#include "_task.h" 
+#include "tbb/task_group.h" 
+#include "../task_arena.h" 
+#include "../flow_graph_abstractions.h" 
+ 
+#include "../concurrent_priority_queue.h" 
+ 
+#include <list> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+ 
+class graph_task; 
+static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; 
+typedef unsigned int node_priority_t; 
+static const node_priority_t no_priority = node_priority_t(0); 
+ 
+class graph; 
+class graph_node; 
+ 
+template <typename GraphContainerType, typename GraphNodeType> 
+class graph_iterator { 
+    friend class graph; 
+    friend class graph_node; 
+public: 
+    typedef size_t size_type; 
+    typedef GraphNodeType value_type; 
+    typedef GraphNodeType* pointer; 
+    typedef GraphNodeType& reference; 
+    typedef const GraphNodeType& const_reference; 
+    typedef std::forward_iterator_tag iterator_category; 
+ 
+    //! Copy constructor 
+    graph_iterator(const graph_iterator& other) : 
+        my_graph(other.my_graph), current_node(other.current_node) 
+    {} 
+ 
+    //! Assignment 
+    graph_iterator& operator=(const graph_iterator& other) { 
+        if (this != &other) { 
+            my_graph = other.my_graph; 
+            current_node = other.current_node; 
+        } 
+        return *this; 
+    } 
+ 
+    //! Dereference 
+    reference operator*() const; 
+ 
+    //! Dereference 
+    pointer operator->() const; 
+ 
+    //! Equality 
+    bool operator==(const graph_iterator& other) const { 
+        return ((my_graph == other.my_graph) && (current_node == other.current_node)); 
+    } 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+    //! Inequality 
+    bool operator!=(const graph_iterator& other) const { return !(operator==(other)); } 
+#endif 
+ 
+    //! Pre-increment 
+    graph_iterator& operator++() { 
+        internal_forward(); 
+        return *this; 
+    } 
+ 
+    //! Post-increment 
+    graph_iterator operator++(int) { 
+        graph_iterator result = *this; 
+        operator++(); 
+        return result; 
+    } 
+ 
+private: 
+    // the graph over which we are iterating 
+    GraphContainerType *my_graph; 
+    // pointer into my_graph's my_nodes list 
+    pointer current_node; 
+ 
+    //! Private initializing constructor for begin() and end() iterators 
+    graph_iterator(GraphContainerType *g, bool begin); 
+    void internal_forward(); 
+};  // class graph_iterator 
+ 
+// flags to modify the behavior of the graph reset().  Can be combined. 
+enum reset_flags { 
+    rf_reset_protocol = 0, 
+    rf_reset_bodies = 1 << 0,  // delete the current node body, reset to a copy of the initial node body. 
+    rf_clear_edges = 1 << 1   // delete edges 
+}; 
+ 
+void activate_graph(graph& g); 
+void deactivate_graph(graph& g); 
+bool is_graph_active(graph& g); 
+graph_task* prioritize_task(graph& g, graph_task& arena_task); 
+void spawn_in_graph_arena(graph& g, graph_task& arena_task); 
+void enqueue_in_graph_arena(graph &g, graph_task& arena_task); 
+ 
+class graph; 
+ 
+//! Base class for tasks generated by graph nodes. 
+class graph_task : public task { 
+public: 
+    graph_task(graph& g, small_object_allocator& allocator 
+               , node_priority_t node_priority = no_priority 
+    ) 
+        : my_graph(g) 
+        , priority(node_priority) 
+        , my_allocator(allocator) 
+    {} 
+    graph& my_graph; // graph instance the task belongs to 
+    // TODO revamp: rename to my_priority 
+    node_priority_t priority; 
+    void destruct_and_deallocate(const execution_data& ed); 
+    task* cancel(execution_data& ed) override; 
+protected: 
+    void finalize(const execution_data& ed); 
+private: 
+    // To organize task_list 
+    graph_task* my_next{ nullptr }; 
+    small_object_allocator my_allocator; 
+    // TODO revamp: elaborate internal interfaces to avoid friends declarations 
+    friend class graph_task_list; 
+    friend graph_task* prioritize_task(graph& g, graph_task& gt); 
+}; 
+ 
+struct graph_task_comparator { 
+    bool operator()(const graph_task* left, const graph_task* right) { 
+        return left->priority < right->priority; 
+    } 
+}; 
+ 
+typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t; 
+ 
+class priority_task_selector : public task { 
+public: 
+    priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) 
+        : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} 
+    task* execute(execution_data& ed) override { 
+        next_task(); 
+        __TBB_ASSERT(my_task, nullptr); 
+        task* t_next = my_task->execute(ed); 
+        my_allocator.delete_object(this, ed); 
+        return t_next; 
+    } 
+    task* cancel(execution_data& ed) override { 
+        if (!my_task) { 
+            next_task(); 
+        } 
+        __TBB_ASSERT(my_task, nullptr); 
+        task* t_next = my_task->cancel(ed); 
+        my_allocator.delete_object(this, ed); 
+        return t_next; 
+    } 
+private: 
+    void next_task() { 
+        // TODO revamp: hold functors in priority queue instead of real tasks 
+        bool result = my_priority_queue.try_pop(my_task); 
+        __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" 
+            " in graph's priority queue mismatched"); 
+        __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, 
+            "Incorrect task submitted to graph priority queue"); 
+        __TBB_ASSERT(my_task->priority != no_priority, 
+            "Tasks from graph's priority queue must have priority"); 
+    } 
+ 
+    graph_task_priority_queue_t& my_priority_queue; 
+    small_object_allocator my_allocator; 
+    graph_task* my_task; 
+}; 
+ 
+template <typename Receiver, typename Body> class run_and_put_task; 
+template <typename Body> class run_task; 
+ 
+//******************************************************************************** 
+// graph tasks helpers 
+//******************************************************************************** 
+ 
+//! The list of graph tasks 
+class graph_task_list : no_copy { 
+private: 
+    graph_task* my_first; 
+    graph_task** my_next_ptr; 
+public: 
+    //! Construct empty list 
+    graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {} 
+ 
+    //! True if list is empty; false otherwise. 
+    bool empty() const { return !my_first; } 
+ 
+    //! Push task onto back of list. 
+    void push_back(graph_task& task) { 
+        task.my_next = nullptr; 
+        *my_next_ptr = &task; 
+        my_next_ptr = &task.my_next; 
+    } 
+ 
+    //! Pop the front task from the list. 
+    graph_task& pop_front() { 
+        __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list"); 
+        graph_task* result = my_first; 
+        my_first = result->my_next; 
+        if (!my_first) { 
+            my_next_ptr = &my_first; 
+        } 
+        return *result; 
+    } 
+}; 
+ 
+//! The graph class 
+/** This class serves as a handle to the graph */ 
+class graph : no_copy, public graph_proxy { 
+    friend class graph_node; 
+ 
+    void prepare_task_arena(bool reinit = false) { 
+        if (reinit) { 
+            __TBB_ASSERT(my_task_arena, "task arena is NULL"); 
+            my_task_arena->terminate(); 
+            my_task_arena->initialize(task_arena::attach()); 
+        } 
+        else { 
+            __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL"); 
+            my_task_arena = new task_arena(task_arena::attach()); 
+        } 
+        if (!my_task_arena->is_active()) // failed to attach 
+            my_task_arena->initialize(); // create a new, default-initialized arena 
+        __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active"); 
+    } 
+ 
+public: 
+    //! Constructs a graph with isolated task_group_context 
+    graph(); 
+ 
+    //! Constructs a graph with use_this_context as context 
+    explicit graph(task_group_context& use_this_context); 
+ 
+    //! Destroys the graph. 
+    /** Calls wait_for_all, then destroys the root task and context. */ 
+    ~graph(); 
+ 
+    //! Used to register that an external entity may still interact with the graph. 
+    /** The graph will not return from wait_for_all until a matching number of release_wait calls is 
+    made. */ 
+    void reserve_wait() override; 
+ 
+    //! Deregisters an external entity that may have interacted with the graph. 
+    /** The graph will not return from wait_for_all until all the number of reserve_wait calls 
+    matches the number of release_wait calls. */ 
+    void release_wait() override; 
+ 
+    //! Wait until graph is idle and the number of release_wait calls equals to the number of 
+    //! reserve_wait calls. 
+    /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */ 
+    void wait_for_all() { 
+        cancelled = false; 
+        caught_exception = false; 
+        try_call([this] { 
+            my_task_arena->execute([this] { 
+                wait(my_wait_context, *my_context); 
+            }); 
+            cancelled = my_context->is_group_execution_cancelled(); 
+        }).on_exception([this] { 
+            my_context->reset(); 
+            caught_exception = true; 
+            cancelled = true; 
+        }); 
+        // TODO: the "if" condition below is just a work-around to support the concurrent wait 
+        // mode. The cancellation and exception mechanisms are still broken in this mode. 
+        // Consider using task group not to re-implement the same functionality. 
+        if (!(my_context->traits() & task_group_context::concurrent_wait)) { 
+            my_context->reset();  // consistent with behavior in catch() 
+        } 
+    } 
+ 
+#if TODO_REVAMP 
+#error Decide on ref_count() presence. 
+    Its only use is in the template<typename T, typename BufferType> void test_resets() 
+#endif 
+ 
+#if __TBB_EXTRA_DEBUG 
+    unsigned ref_count() const { return my_wait_context.reference_count(); } 
+#endif 
+ 
+ 
+    // TODO revamp: consider adding getter for task_group_context. 
+ 
+    // ITERATORS 
+    template<typename C, typename N> 
+    friend class graph_iterator; 
+ 
+    // Graph iterator typedefs 
+    typedef graph_iterator<graph, graph_node> iterator; 
+    typedef graph_iterator<const graph, const graph_node> const_iterator; 
+ 
+    // Graph iterator constructors 
+    //! start iterator 
+    iterator begin(); 
+    //! end iterator 
+    iterator end(); 
+    //! start const iterator 
+    const_iterator begin() const; 
+    //! end const iterator 
+    const_iterator end() const; 
+    //! start const iterator 
+    const_iterator cbegin() const; 
+    //! end const iterator 
+    const_iterator cend() const; 
+ 
+    // thread-unsafe state reset. 
+    void reset(reset_flags f = rf_reset_protocol); 
+ 
+    //! cancels execution of the associated task_group_context 
+    void cancel(); 
+ 
+    //! return status of graph execution 
+    bool is_cancelled() { return cancelled; } 
+    bool exception_thrown() { return caught_exception; } 
+ 
+private: 
+    wait_context my_wait_context; 
+    task_group_context *my_context; 
+    bool own_context; 
+    bool cancelled; 
+    bool caught_exception; 
+    bool my_is_active; 
+ 
+    graph_node *my_nodes, *my_nodes_last; 
+ 
+    tbb::spin_mutex nodelist_mutex; 
+    void register_node(graph_node *n); 
+    void remove_node(graph_node *n); 
+ 
+    task_arena* my_task_arena; 
+ 
+    graph_task_priority_queue_t my_priority_queue; 
+ 
+    friend void activate_graph(graph& g); 
+    friend void deactivate_graph(graph& g); 
+    friend bool is_graph_active(graph& g); 
+    friend graph_task* prioritize_task(graph& g, graph_task& arena_task); 
+    friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); 
+    friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); 
+ 
+    friend class task_arena_base; 
+ 
+};  // class graph 
+ 
+inline void graph_task::destruct_and_deallocate(const execution_data& ed) { 
+    auto allocator = my_allocator; 
+    // TODO: investigate if direct call of derived destructor gives any benefits. 
+    this->~graph_task(); 
+    allocator.deallocate(this, ed); 
+} 
+ 
+inline void graph_task::finalize(const execution_data& ed) { 
+    graph& g = my_graph; 
+    destruct_and_deallocate(ed); 
+    g.release_wait(); 
+} 
+ 
+inline task* graph_task::cancel(execution_data& ed) { 
+    finalize(ed); 
+    return nullptr; 
+} 
+ 
+//******************************************************************************** 
+// end of graph tasks helpers 
+//******************************************************************************** 
+ 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+class get_graph_helper; 
+#endif 
+ 
+//! The base of all graph nodes. 
+class graph_node : no_copy { 
+    friend class graph; 
+    template<typename C, typename N> 
+    friend class graph_iterator; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    friend class get_graph_helper; 
+#endif 
+ 
+protected: 
+    graph& my_graph; 
+    graph& graph_reference() const { 
+        // TODO revamp: propagate graph_reference() method to all the reference places. 
+        return my_graph; 
+    } 
+    graph_node* next = nullptr; 
+    graph_node* prev = nullptr; 
+public: 
+    explicit graph_node(graph& g); 
+ 
+    virtual ~graph_node(); 
+ 
+protected: 
+    // performs the reset on an individual node. 
+    virtual void reset_node(reset_flags f = rf_reset_protocol) = 0; 
+};  // class graph_node 
+ 
+inline void activate_graph(graph& g) { 
+    g.my_is_active = true; 
+} 
+ 
+inline void deactivate_graph(graph& g) { 
+    g.my_is_active = false; 
+} 
+ 
+inline bool is_graph_active(graph& g) { 
+    return g.my_is_active; 
+} 
+ 
+inline graph_task* prioritize_task(graph& g, graph_task& gt) { 
+    if( no_priority == gt.priority ) 
+        return &gt; 
+ 
+    //! Non-preemptive priority pattern. The original task is submitted as a work item to the 
+    //! priority queue, and a new critical task is created to take and execute a work item with 
+    //! the highest known priority. The reference counting responsibility is transferred (via 
+    //! allocate_continuation) to the new task. 
+    task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); 
+    __TBB_ASSERT( critical_task, "bad_alloc?" ); 
+    g.my_priority_queue.push(&gt); 
+    using tbb::detail::d1::submit; 
+    submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true ); 
+    return nullptr; 
+} 
+ 
+//! Spawns a task inside graph arena 
+inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { 
+    if (is_graph_active(g)) { 
+        task* gt = prioritize_task(g, arena_task); 
+        if( !gt ) 
+            return; 
+ 
+        __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL); 
+        submit( *gt, *g.my_task_arena, *g.my_context 
+#if __TBB_PREVIEW_CRITICAL_TASKS 
+                , /*as_critical=*/false 
+#endif 
+        ); 
+    } 
+} 
+ 
+// TODO revamp: unify *_in_graph_arena functions 
+ 
+//! Enqueues a task inside graph arena 
+inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { 
+    if (is_graph_active(g)) { 
+        __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); 
+ 
+        // TODO revamp: decide on the approach that does not postpone critical task 
+        if( task* gt = prioritize_task(g, arena_task) ) 
+            submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); 
+    } 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_flow_graph_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h
index f4f55a6c7a..d73fe86bfc 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h
@@ -1,351 +1,351 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_indexer_impl_H
-#define __TBB__flow_graph_indexer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::detail::d1
-
-#include "_flow_graph_types_impl.h"
-
-    // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
-    // the form  tagged_msg<tag, result>
-    // where the value of tag will indicate which result was put to the
-    // successor.
-
-    template<typename IndexerNodeBaseType, typename T, size_t K>
-    graph_task* do_try_put(const T &v, void *p) {
-        typename IndexerNodeBaseType::output_type o(K, v);
-        return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
-    }
-
-    template<typename TupleTypes,int N>
-    struct indexer_helper {
-        template<typename IndexerNodeBaseType, typename PortTuple>
-        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
-            typedef typename std::tuple_element<N-1, TupleTypes>::type T;
-            graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
-            std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g);
-            indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g);
-        }
-    };
-
-    template<typename TupleTypes>
-    struct indexer_helper<TupleTypes,1> {
-        template<typename IndexerNodeBaseType, typename PortTuple>
-        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
-            typedef typename std::tuple_element<0, TupleTypes>::type T;
-            graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
-            std::get<0>(my_input).set_up(p, indexer_node_put_task, g);
-        }
-    };
-
-    template<typename T>
-    class indexer_input_port : public receiver<T> {
-    private:
-        void* my_indexer_ptr;
-        typedef graph_task* (* forward_function_ptr)(T const &, void* );
-        forward_function_ptr my_try_put_task;
-        graph* my_graph;
-    public:
-        void set_up(void* p, forward_function_ptr f, graph& g) {
-            my_indexer_ptr = p;
-            my_try_put_task = f;
-            my_graph = &g;
-        }
-
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class broadcast_cache;
-        template<typename X, typename Y> friend class round_robin_cache;
-        graph_task* try_put_task(const T &v) override {
-            return my_try_put_task(v, my_indexer_ptr);
-        }
-
-        graph& graph_reference() const override {
-            return *my_graph;
-        }
-    };
-
-    template<typename InputTuple, typename OutputType, typename StructTypes>
-    class indexer_node_FE {
-    public:
-        static const int N = std::tuple_size<InputTuple>::value;
-        typedef OutputType output_type;
-        typedef InputTuple input_type;
-
-        // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member.
-        indexer_node_FE() : my_inputs() {}
-
-        input_type &input_ports() { return my_inputs; }
-    protected:
-        input_type my_inputs;
-    };
-
-    //! indexer_node_base
-    template<typename InputTuple, typename OutputType, typename StructTypes>
-    class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
-                           public sender<OutputType> {
-    protected:
-       using graph_node::my_graph;
-    public:
-        static const size_t N = std::tuple_size<InputTuple>::value;
-        typedef OutputType output_type;
-        typedef StructTypes tuple_types;
-        typedef typename sender<output_type>::successor_type successor_type;
-        typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
-
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_succ, rem_succ, try__put_task
-        };
-        typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type;
-
-        class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
-        public:
-            char type;
-            union {
-                output_type const *my_arg;
-                successor_type *my_succ;
-                graph_task* bypass_t;
-            };
-            indexer_node_base_operation(const output_type* e, op_type t) :
-                type(char(t)), my_arg(e) {}
-            indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
-                my_succ(const_cast<successor_type *>(&s)) {}
-        };
-
-        typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type;
-        friend class aggregating_functor<class_type, indexer_node_base_operation>;
-        aggregator<handler_type, indexer_node_base_operation> my_aggregator;
-
-        void handle_operations(indexer_node_base_operation* op_list) {
-            indexer_node_base_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-
-                case reg_succ:
-                    my_successors.register_successor(*(current->my_succ));
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-
-                case rem_succ:
-                    my_successors.remove_successor(*(current->my_succ));
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case try__put_task: {
-                        current->bypass_t = my_successors.try_put_task(*(current->my_arg));
-                        current->status.store( SUCCEEDED, std::memory_order_release);  // return of try_put_task actual return value
-                    }
-                    break;
-                }
-            }
-        }
-        // ---------- end aggregator -----------
-    public:
-        indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) {
-            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        indexer_node_base(const indexer_node_base& other)
-            : graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this)
-        {
-            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        bool register_successor(successor_type &r) override {
-            indexer_node_base_operation op_data(r, reg_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool remove_successor( successor_type &r) override {
-            indexer_node_base_operation op_data(r, rem_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        graph_task* try_put_task(output_type const *v) { // not a virtual method in this class
-            indexer_node_base_operation op_data(v, try__put_task);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-    protected:
-        void reset_node(reset_flags f) override {
-            if(f & rf_clear_edges) {
-                my_successors.clear();
-            }
-        }
-
-    private:
-        broadcast_cache<output_type, null_rw_mutex> my_successors;
-    };  //indexer_node_base
-
-
-    template<int N, typename InputTuple> struct input_types;
-
-    template<typename InputTuple>
-    struct input_types<1, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef tagged_msg<size_t, first_type > type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<2, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef tagged_msg<size_t, first_type, second_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<3, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<4, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<5, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<6, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<7, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type> type;
-    };
-
-
-    template<typename InputTuple>
-    struct input_types<8, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type> type;
-    };
-
-
-    template<typename InputTuple>
-    struct input_types<9, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
-        typedef typename std::tuple_element<8, InputTuple>::type nineth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type, nineth_type> type;
-    };
-
-    template<typename InputTuple>
-    struct input_types<10, InputTuple> {
-        typedef typename std::tuple_element<0, InputTuple>::type first_type;
-        typedef typename std::tuple_element<1, InputTuple>::type second_type;
-        typedef typename std::tuple_element<2, InputTuple>::type third_type;
-        typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
-        typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
-        typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
-        typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
-        typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
-        typedef typename std::tuple_element<8, InputTuple>::type nineth_type;
-        typedef typename std::tuple_element<9, InputTuple>::type tenth_type;
-        typedef tagged_msg<size_t, first_type, second_type, third_type,
-                                                      fourth_type, fifth_type, sixth_type,
-                                                      seventh_type, eighth_type, nineth_type,
-                                                      tenth_type> type;
-    };
-
-    // type generators
-    template<typename OutputTuple>
-    struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> {
-        static const int N = std::tuple_size<OutputTuple>::value;
-        typedef typename input_types<N, OutputTuple>::type output_type;
-        typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
-        typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
-        typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
-    };
-
-    template<class OutputTuple>
-    class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
-    public:
-        typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
-        typedef OutputTuple tuple_types;
-        typedef typename indexer_types<OutputTuple>::output_type output_type;
-    private:
-        typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
-    public:
-        unfolded_indexer_node(graph& g) : base_type(g) {}
-        unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
-    };
-
-#endif  /* __TBB__flow_graph_indexer_impl_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_indexer_impl_H 
+#define __TBB__flow_graph_indexer_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included in namespace tbb::detail::d1 
+ 
+#include "_flow_graph_types_impl.h" 
+ 
+    // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of 
+    // the form  tagged_msg<tag, result> 
+    // where the value of tag will indicate which result was put to the 
+    // successor. 
+ 
+    template<typename IndexerNodeBaseType, typename T, size_t K> 
+    graph_task* do_try_put(const T &v, void *p) { 
+        typename IndexerNodeBaseType::output_type o(K, v); 
+        return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o); 
+    } 
+ 
+    template<typename TupleTypes,int N> 
+    struct indexer_helper { 
+        template<typename IndexerNodeBaseType, typename PortTuple> 
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { 
+            typedef typename std::tuple_element<N-1, TupleTypes>::type T; 
+            graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>; 
+            std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g); 
+            indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g); 
+        } 
+    }; 
+ 
+    template<typename TupleTypes> 
+    struct indexer_helper<TupleTypes,1> { 
+        template<typename IndexerNodeBaseType, typename PortTuple> 
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { 
+            typedef typename std::tuple_element<0, TupleTypes>::type T; 
+            graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>; 
+            std::get<0>(my_input).set_up(p, indexer_node_put_task, g); 
+        } 
+    }; 
+ 
+    template<typename T> 
+    class indexer_input_port : public receiver<T> { 
+    private: 
+        void* my_indexer_ptr; 
+        typedef graph_task* (* forward_function_ptr)(T const &, void* ); 
+        forward_function_ptr my_try_put_task; 
+        graph* my_graph; 
+    public: 
+        void set_up(void* p, forward_function_ptr f, graph& g) { 
+            my_indexer_ptr = p; 
+            my_try_put_task = f; 
+            my_graph = &g; 
+        } 
+ 
+    protected: 
+        template< typename R, typename B > friend class run_and_put_task; 
+        template<typename X, typename Y> friend class broadcast_cache; 
+        template<typename X, typename Y> friend class round_robin_cache; 
+        graph_task* try_put_task(const T &v) override { 
+            return my_try_put_task(v, my_indexer_ptr); 
+        } 
+ 
+        graph& graph_reference() const override { 
+            return *my_graph; 
+        } 
+    }; 
+ 
+    template<typename InputTuple, typename OutputType, typename StructTypes> 
+    class indexer_node_FE { 
+    public: 
+        static const int N = std::tuple_size<InputTuple>::value; 
+        typedef OutputType output_type; 
+        typedef InputTuple input_type; 
+ 
+        // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member. 
+        indexer_node_FE() : my_inputs() {} 
+ 
+        input_type &input_ports() { return my_inputs; } 
+    protected: 
+        input_type my_inputs; 
+    }; 
+ 
+    //! indexer_node_base 
+    template<typename InputTuple, typename OutputType, typename StructTypes> 
+    class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>, 
+                           public sender<OutputType> { 
+    protected: 
+       using graph_node::my_graph; 
+    public: 
+        static const size_t N = std::tuple_size<InputTuple>::value; 
+        typedef OutputType output_type; 
+        typedef StructTypes tuple_types; 
+        typedef typename sender<output_type>::successor_type successor_type; 
+        typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type; 
+ 
+    private: 
+        // ----------- Aggregator ------------ 
+        enum op_type { reg_succ, rem_succ, try__put_task 
+        }; 
+        typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type; 
+ 
+        class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> { 
+        public: 
+            char type; 
+            union { 
+                output_type const *my_arg; 
+                successor_type *my_succ; 
+                graph_task* bypass_t; 
+            }; 
+            indexer_node_base_operation(const output_type* e, op_type t) : 
+                type(char(t)), my_arg(e) {} 
+            indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), 
+                my_succ(const_cast<successor_type *>(&s)) {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type; 
+        friend class aggregating_functor<class_type, indexer_node_base_operation>; 
+        aggregator<handler_type, indexer_node_base_operation> my_aggregator; 
+ 
+        void handle_operations(indexer_node_base_operation* op_list) { 
+            indexer_node_base_operation *current; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+ 
+                case reg_succ: 
+                    my_successors.register_successor(*(current->my_succ)); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+ 
+                case rem_succ: 
+                    my_successors.remove_successor(*(current->my_succ)); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case try__put_task: { 
+                        current->bypass_t = my_successors.try_put_task(*(current->my_arg)); 
+                        current->status.store( SUCCEEDED, std::memory_order_release);  // return of try_put_task actual return value 
+                    } 
+                    break; 
+                } 
+            } 
+        } 
+        // ---------- end aggregator ----------- 
+    public: 
+        indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { 
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        indexer_node_base(const indexer_node_base& other) 
+            : graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this) 
+        { 
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        bool register_successor(successor_type &r) override { 
+            indexer_node_base_operation op_data(r, reg_succ); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        bool remove_successor( successor_type &r) override { 
+            indexer_node_base_operation op_data(r, rem_succ); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        graph_task* try_put_task(output_type const *v) { // not a virtual method in this class 
+            indexer_node_base_operation op_data(v, try__put_task); 
+            my_aggregator.execute(&op_data); 
+            return op_data.bypass_t; 
+        } 
+ 
+    protected: 
+        void reset_node(reset_flags f) override { 
+            if(f & rf_clear_edges) { 
+                my_successors.clear(); 
+            } 
+        } 
+ 
+    private: 
+        broadcast_cache<output_type, null_rw_mutex> my_successors; 
+    };  //indexer_node_base 
+ 
+ 
+    template<int N, typename InputTuple> struct input_types; 
+ 
+    template<typename InputTuple> 
+    struct input_types<1, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef tagged_msg<size_t, first_type > type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<2, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef tagged_msg<size_t, first_type, second_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<3, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<4, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<5, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<6, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef typename std::tuple_element<5, InputTuple>::type sixth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type, sixth_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<7, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef typename std::tuple_element<5, InputTuple>::type sixth_type; 
+        typedef typename std::tuple_element<6, InputTuple>::type seventh_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type, sixth_type, 
+                                                      seventh_type> type; 
+    }; 
+ 
+ 
+    template<typename InputTuple> 
+    struct input_types<8, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef typename std::tuple_element<5, InputTuple>::type sixth_type; 
+        typedef typename std::tuple_element<6, InputTuple>::type seventh_type; 
+        typedef typename std::tuple_element<7, InputTuple>::type eighth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type, sixth_type, 
+                                                      seventh_type, eighth_type> type; 
+    }; 
+ 
+ 
+    template<typename InputTuple> 
+    struct input_types<9, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef typename std::tuple_element<5, InputTuple>::type sixth_type; 
+        typedef typename std::tuple_element<6, InputTuple>::type seventh_type; 
+        typedef typename std::tuple_element<7, InputTuple>::type eighth_type; 
+        typedef typename std::tuple_element<8, InputTuple>::type nineth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type, sixth_type, 
+                                                      seventh_type, eighth_type, nineth_type> type; 
+    }; 
+ 
+    template<typename InputTuple> 
+    struct input_types<10, InputTuple> { 
+        typedef typename std::tuple_element<0, InputTuple>::type first_type; 
+        typedef typename std::tuple_element<1, InputTuple>::type second_type; 
+        typedef typename std::tuple_element<2, InputTuple>::type third_type; 
+        typedef typename std::tuple_element<3, InputTuple>::type fourth_type; 
+        typedef typename std::tuple_element<4, InputTuple>::type fifth_type; 
+        typedef typename std::tuple_element<5, InputTuple>::type sixth_type; 
+        typedef typename std::tuple_element<6, InputTuple>::type seventh_type; 
+        typedef typename std::tuple_element<7, InputTuple>::type eighth_type; 
+        typedef typename std::tuple_element<8, InputTuple>::type nineth_type; 
+        typedef typename std::tuple_element<9, InputTuple>::type tenth_type; 
+        typedef tagged_msg<size_t, first_type, second_type, third_type, 
+                                                      fourth_type, fifth_type, sixth_type, 
+                                                      seventh_type, eighth_type, nineth_type, 
+                                                      tenth_type> type; 
+    }; 
+ 
+    // type generators 
+    template<typename OutputTuple> 
+    struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> { 
+        static const int N = std::tuple_size<OutputTuple>::value; 
+        typedef typename input_types<N, OutputTuple>::type output_type; 
+        typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type; 
+        typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type; 
+        typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type; 
+    }; 
+ 
+    template<class OutputTuple> 
+    class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type { 
+    public: 
+        typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type; 
+        typedef OutputTuple tuple_types; 
+        typedef typename indexer_types<OutputTuple>::output_type output_type; 
+    private: 
+        typedef typename indexer_types<OutputTuple>::indexer_base_type base_type; 
+    public: 
+        unfolded_indexer_node(graph& g) : base_type(g) {} 
+        unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {} 
+    }; 
+ 
+#endif  /* __TBB__flow_graph_indexer_impl_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h
index 4466bf4180..84ec74c7ae 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h
@@ -1,279 +1,279 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_item_buffer_impl_H
-#define __TBB__flow_graph_item_buffer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_aligned_space.h"
-
-// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h)
-
-//! Expandable buffer of items.  The possible operations are push, pop,
-//* tests for empty and so forth.  No mutual exclusion is built in.
-//* objects are constructed into and explicitly-destroyed.  get_my_item gives
-// a read-only reference to the item in the buffer.  set_my_item may be called
-// with either an empty or occupied slot.
-
-template <typename T, typename A=cache_aligned_allocator<T> >
-class item_buffer {
-public:
-    typedef T item_type;
-    enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
-protected:
-    typedef size_t size_type;
-    typedef std::pair<item_type, buffer_item_state> aligned_space_item;
-    typedef aligned_space<aligned_space_item> buffer_item_type;
-    typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type;
-    buffer_item_type *my_array;
-    size_type my_array_size;
-    static const size_type initial_buffer_size = 4;
-    size_type my_head;
-    size_type my_tail;
-
-    bool buffer_empty() const { return my_head == my_tail; }
-
-    aligned_space_item &item(size_type i) {
-        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value),NULL);
-        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL);
-        return *my_array[i & (my_array_size - 1) ].begin();
-    }
-
-    const aligned_space_item &item(size_type i) const {
-        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), NULL);
-        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL);
-        return *my_array[i & (my_array_size-1)].begin();
-    }
-
-    bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
-#if TBB_USE_ASSERT
-    bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; }
-#endif
-
-    // object management in buffer
-    const item_type &get_my_item(size_t i) const {
-        __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
-        item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first));
-        return *itm;
-    }
-
-    // may be called with an empty slot or a slot that has already been constructed into.
-    void set_my_item(size_t i, const item_type &o) {
-        if(item(i).second != no_item) {
-            destroy_item(i);
-        }
-        new(&(item(i).first)) item_type(o);
-        item(i).second = has_item;
-    }
-
-    // destructively-fetch an object from the buffer
-    void fetch_item(size_t i, item_type &o) {
-        __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
-        o = get_my_item(i);  // could have std::move assign semantics
-        destroy_item(i);
-    }
-
-    // move an existing item from one slot to another.  The moved-to slot must be unoccupied,
-    // the moved-from slot must exist and not be reserved.  The after, from will be empty,
-    // to will be occupied but not reserved
-    void move_item(size_t to, size_t from) {
-        __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
-        __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
-        set_my_item(to, get_my_item(from));   // could have std::move semantics
-        destroy_item(from);
-
-    }
-
-    // put an item in an empty slot.  Return true if successful, else false
-    bool place_item(size_t here, const item_type &me) {
-#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
-        if(my_item_valid(here)) return false;
-#endif
-        set_my_item(here, me);
-        return true;
-    }
-
-    // could be implemented with std::move semantics
-    void swap_items(size_t i, size_t j) {
-        __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
-        item_type temp = get_my_item(i);
-        set_my_item(i, get_my_item(j));
-        set_my_item(j, temp);
-    }
-
-    void destroy_item(size_type i) {
-        __TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
-        item(i).first.~item_type();
-        item(i).second = no_item;
-    }
-
-    // returns the front element
-    const item_type& front() const
-    {
-        __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
-        return get_my_item(my_head);
-    }
-
-    // returns  the back element
-    const item_type& back() const
-    {
-        __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item");
-        return get_my_item(my_tail - 1);
-    }
-
-    // following methods are for reservation of the front of a buffer.
-    void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
-    void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
-
-    void destroy_front() { destroy_item(my_head); ++my_head; }
-    void destroy_back() { destroy_item(my_tail-1); --my_tail; }
-
-    // we have to be able to test against a new tail value without changing my_tail
-    // grow_array doesn't work if we change my_tail when the old array is too small
-    size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
-    size_type capacity() { return my_array_size; }
-    // sequencer_node does not use this method, so we don't
-    // need a version that passes in the new_tail value.
-    bool buffer_full() { return size() >= capacity(); }
-
-    //! Grows the internal array.
-    void grow_my_array( size_t minimum_size ) {
-        // test that we haven't made the structure inconsistent.
-        __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
-        size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
-        while( new_size<minimum_size )
-            new_size*=2;
-
-        buffer_item_type* new_array = allocator_type().allocate(new_size);
-
-        // initialize validity to "no"
-        for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; }
-
-        for( size_type i=my_head; i<my_tail; ++i) {
-            if(my_item_valid(i)) {  // sequencer_node may have empty slots
-                // placement-new copy-construct; could be std::move
-                char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first);
-                (void)new(new_space) item_type(get_my_item(i));
-                new_array[i&(new_size-1)].begin()->second = item(i).second;
-            }
-        }
-
-        clean_up_buffer(/*reset_pointers*/false);
-
-        my_array = new_array;
-        my_array_size = new_size;
-    }
-
-    bool push_back(item_type &v) {
-        if(buffer_full()) {
-            grow_my_array(size() + 1);
-        }
-        set_my_item(my_tail, v);
-        ++my_tail;
-        return true;
-    }
-
-    bool pop_back(item_type &v) {
-        if (!my_item_valid(my_tail-1)) {
-            return false;
-        }
-        v = this->back();
-        destroy_back();
-        return true;
-    }
-
-    bool pop_front(item_type &v) {
-        if(!my_item_valid(my_head)) {
-            return false;
-        }
-        v = this->front();
-        destroy_front();
-        return true;
-    }
-
-    // This is used both for reset and for grow_my_array.  In the case of grow_my_array
-    // we want to retain the values of the head and tail.
-    void clean_up_buffer(bool reset_pointers) {
-        if (my_array) {
-            for( size_type i=my_head; i<my_tail; ++i ) {
-                if(my_item_valid(i))
-                    destroy_item(i);
-            }
-            allocator_type().deallocate(my_array,my_array_size);
-        }
-        my_array = NULL;
-        if(reset_pointers) {
-            my_head = my_tail = my_array_size = 0;
-        }
-    }
-
-public:
-    //! Constructor
-    item_buffer( ) : my_array(NULL), my_array_size(0),
-                     my_head(0), my_tail(0) {
-        grow_my_array(initial_buffer_size);
-    }
-
-    ~item_buffer() {
-        clean_up_buffer(/*reset_pointers*/true);
-    }
-
-    void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
-
-};
-
-//! item_buffer with reservable front-end.  NOTE: if reserving, do not
-//* complete operation with pop_front(); use consume_front().
-//* No synchronization built-in.
-template<typename T, typename A=cache_aligned_allocator<T> >
-class reservable_item_buffer : public item_buffer<T, A> {
-protected:
-    using item_buffer<T, A>::my_item_valid;
-    using item_buffer<T, A>::my_head;
-
-public:
-    reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
-    void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
-protected:
-
-    bool reserve_front(T &v) {
-        if(my_reserved || !my_item_valid(this->my_head)) return false;
-        my_reserved = true;
-        // reserving the head
-        v = this->front();
-        this->reserve_item(this->my_head);
-        return true;
-    }
-
-    void consume_front() {
-        __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
-        this->destroy_front();
-        my_reserved = false;
-    }
-
-    void release_front() {
-        __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
-        this->release_item(this->my_head);
-        my_reserved = false;
-    }
-
-    bool my_reserved;
-};
-
-#endif // __TBB__flow_graph_item_buffer_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_item_buffer_impl_H 
+#define __TBB__flow_graph_item_buffer_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+#include "_aligned_space.h" 
+ 
+// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h) 
+ 
+//! Expandable buffer of items.  The possible operations are push, pop, 
+//* tests for empty and so forth.  No mutual exclusion is built in. 
+//* objects are constructed into and explicitly-destroyed.  get_my_item gives 
+// a read-only reference to the item in the buffer.  set_my_item may be called 
+// with either an empty or occupied slot. 
+ 
+template <typename T, typename A=cache_aligned_allocator<T> > 
+class item_buffer { 
+public: 
+    typedef T item_type; 
+    enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; 
+protected: 
+    typedef size_t size_type; 
+    typedef std::pair<item_type, buffer_item_state> aligned_space_item; 
+    typedef aligned_space<aligned_space_item> buffer_item_type; 
+    typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type; 
+    buffer_item_type *my_array; 
+    size_type my_array_size; 
+    static const size_type initial_buffer_size = 4; 
+    size_type my_head; 
+    size_type my_tail; 
+ 
+    bool buffer_empty() const { return my_head == my_tail; } 
+ 
+    aligned_space_item &item(size_type i) { 
+        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value),NULL); 
+        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); 
+        return *my_array[i & (my_array_size - 1) ].begin(); 
+    } 
+ 
+    const aligned_space_item &item(size_type i) const { 
+        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), NULL); 
+        __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); 
+        return *my_array[i & (my_array_size-1)].begin(); 
+    } 
+ 
+    bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } 
+#if TBB_USE_ASSERT 
+    bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } 
+#endif 
+ 
+    // object management in buffer 
+    const item_type &get_my_item(size_t i) const { 
+        __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); 
+        item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first)); 
+        return *itm; 
+    } 
+ 
+    // may be called with an empty slot or a slot that has already been constructed into. 
+    void set_my_item(size_t i, const item_type &o) { 
+        if(item(i).second != no_item) { 
+            destroy_item(i); 
+        } 
+        new(&(item(i).first)) item_type(o); 
+        item(i).second = has_item; 
+    } 
+ 
+    // destructively-fetch an object from the buffer 
+    void fetch_item(size_t i, item_type &o) { 
+        __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); 
+        o = get_my_item(i);  // could have std::move assign semantics 
+        destroy_item(i); 
+    } 
+ 
+    // move an existing item from one slot to another.  The moved-to slot must be unoccupied, 
+    // the moved-from slot must exist and not be reserved.  The after, from will be empty, 
+    // to will be occupied but not reserved 
+    void move_item(size_t to, size_t from) { 
+        __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); 
+        __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); 
+        set_my_item(to, get_my_item(from));   // could have std::move semantics 
+        destroy_item(from); 
+ 
+    } 
+ 
+    // put an item in an empty slot.  Return true if successful, else false 
+    bool place_item(size_t here, const item_type &me) { 
+#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES 
+        if(my_item_valid(here)) return false; 
+#endif 
+        set_my_item(here, me); 
+        return true; 
+    } 
+ 
+    // could be implemented with std::move semantics 
+    void swap_items(size_t i, size_t j) { 
+        __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); 
+        item_type temp = get_my_item(i); 
+        set_my_item(i, get_my_item(j)); 
+        set_my_item(j, temp); 
+    } 
+ 
+    void destroy_item(size_type i) { 
+        __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); 
+        item(i).first.~item_type(); 
+        item(i).second = no_item; 
+    } 
+ 
+    // returns the front element 
+    const item_type& front() const 
+    { 
+        __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); 
+        return get_my_item(my_head); 
+    } 
+ 
+    // returns  the back element 
+    const item_type& back() const 
+    { 
+        __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); 
+        return get_my_item(my_tail - 1); 
+    } 
+ 
+    // following methods are for reservation of the front of a buffer. 
+    void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } 
+    void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } 
+ 
+    void destroy_front() { destroy_item(my_head); ++my_head; } 
+    void destroy_back() { destroy_item(my_tail-1); --my_tail; } 
+ 
+    // we have to be able to test against a new tail value without changing my_tail 
+    // grow_array doesn't work if we change my_tail when the old array is too small 
+    size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; } 
+    size_type capacity() { return my_array_size; } 
+    // sequencer_node does not use this method, so we don't 
+    // need a version that passes in the new_tail value. 
+    bool buffer_full() { return size() >= capacity(); } 
+ 
+    //! Grows the internal array. 
+    void grow_my_array( size_t minimum_size ) { 
+        // test that we haven't made the structure inconsistent. 
+        __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity"); 
+        size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size; 
+        while( new_size<minimum_size ) 
+            new_size*=2; 
+ 
+        buffer_item_type* new_array = allocator_type().allocate(new_size); 
+ 
+        // initialize validity to "no" 
+        for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; } 
+ 
+        for( size_type i=my_head; i<my_tail; ++i) { 
+            if(my_item_valid(i)) {  // sequencer_node may have empty slots 
+                // placement-new copy-construct; could be std::move 
+                char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first); 
+                (void)new(new_space) item_type(get_my_item(i)); 
+                new_array[i&(new_size-1)].begin()->second = item(i).second; 
+            } 
+        } 
+ 
+        clean_up_buffer(/*reset_pointers*/false); 
+ 
+        my_array = new_array; 
+        my_array_size = new_size; 
+    } 
+ 
+    bool push_back(item_type &v) { 
+        if(buffer_full()) { 
+            grow_my_array(size() + 1); 
+        } 
+        set_my_item(my_tail, v); 
+        ++my_tail; 
+        return true; 
+    } 
+ 
+    bool pop_back(item_type &v) { 
+        if (!my_item_valid(my_tail-1)) { 
+            return false; 
+        } 
+        v = this->back(); 
+        destroy_back(); 
+        return true; 
+    } 
+ 
+    bool pop_front(item_type &v) { 
+        if(!my_item_valid(my_head)) { 
+            return false; 
+        } 
+        v = this->front(); 
+        destroy_front(); 
+        return true; 
+    } 
+ 
+    // This is used both for reset and for grow_my_array.  In the case of grow_my_array 
+    // we want to retain the values of the head and tail. 
+    void clean_up_buffer(bool reset_pointers) { 
+        if (my_array) { 
+            for( size_type i=my_head; i<my_tail; ++i ) { 
+                if(my_item_valid(i)) 
+                    destroy_item(i); 
+            } 
+            allocator_type().deallocate(my_array,my_array_size); 
+        } 
+        my_array = NULL; 
+        if(reset_pointers) { 
+            my_head = my_tail = my_array_size = 0; 
+        } 
+    } 
+ 
+public: 
+    //! Constructor 
+    item_buffer( ) : my_array(NULL), my_array_size(0), 
+                     my_head(0), my_tail(0) { 
+        grow_my_array(initial_buffer_size); 
+    } 
+ 
+    ~item_buffer() { 
+        clean_up_buffer(/*reset_pointers*/true); 
+    } 
+ 
+    void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); } 
+ 
+}; 
+ 
+//! item_buffer with reservable front-end.  NOTE: if reserving, do not 
+//* complete operation with pop_front(); use consume_front(). 
+//* No synchronization built-in. 
+template<typename T, typename A=cache_aligned_allocator<T> > 
+class reservable_item_buffer : public item_buffer<T, A> { 
+protected: 
+    using item_buffer<T, A>::my_item_valid; 
+    using item_buffer<T, A>::my_head; 
+ 
+public: 
+    reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {} 
+    void reset() {my_reserved = false; item_buffer<T,A>::reset(); } 
+protected: 
+ 
+    bool reserve_front(T &v) { 
+        if(my_reserved || !my_item_valid(this->my_head)) return false; 
+        my_reserved = true; 
+        // reserving the head 
+        v = this->front(); 
+        this->reserve_item(this->my_head); 
+        return true; 
+    } 
+ 
+    void consume_front() { 
+        __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); 
+        this->destroy_front(); 
+        my_reserved = false; 
+    } 
+ 
+    void release_front() { 
+        __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item"); 
+        this->release_item(this->my_head); 
+        my_reserved = false; 
+    } 
+ 
+    bool my_reserved; 
+}; 
+ 
+#endif // __TBB__flow_graph_item_buffer_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h
index 98b357cdbc..c8316edd56 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h
@@ -1,1706 +1,1706 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_join_impl_H
-#define __TBB__flow_graph_join_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included into namespace tbb::detail::d1
-
-    struct forwarding_base : no_assign {
-        forwarding_base(graph &g) : graph_ref(g) {}
-        virtual ~forwarding_base() {}
-        graph& graph_ref;
-    };
-
-    struct queueing_forwarding_base : forwarding_base {
-        using forwarding_base::forwarding_base;
-        // decrement_port_count may create a forwarding task.  If we cannot handle the task
-        // ourselves, ask decrement_port_count to deal with it.
-        virtual graph_task* decrement_port_count(bool handle_task) = 0;
-    };
-
-    struct reserving_forwarding_base : forwarding_base {
-        using forwarding_base::forwarding_base;
-        // decrement_port_count may create a forwarding task.  If we cannot handle the task
-        // ourselves, ask decrement_port_count to deal with it.
-        virtual graph_task* decrement_port_count() = 0;
-        virtual void increment_port_count() = 0;
-    };
-
-    // specialization that lets us keep a copy of the current_key for building results.
-    // KeyType can be a reference type.
-    template<typename KeyType>
-    struct matching_forwarding_base : public forwarding_base {
-        typedef typename std::decay<KeyType>::type current_key_type;
-        matching_forwarding_base(graph &g) : forwarding_base(g) { }
-        virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0;
-        current_key_type current_key; // so ports can refer to FE's desired items
-    };
-
-    template< int N >
-    struct join_helper {
-
-        template< typename TupleType, typename PortType >
-        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
-            std::get<N-1>( my_input ).set_join_node_pointer(port);
-            join_helper<N-1>::set_join_node_pointer( my_input, port );
-        }
-        template< typename TupleType >
-        static inline void consume_reservations( TupleType &my_input ) {
-            std::get<N-1>( my_input ).consume();
-            join_helper<N-1>::consume_reservations( my_input );
-        }
-
-        template< typename TupleType >
-        static inline void release_my_reservation( TupleType &my_input ) {
-            std::get<N-1>( my_input ).release();
-        }
-
-        template <typename TupleType>
-        static inline void release_reservations( TupleType &my_input) {
-            join_helper<N-1>::release_reservations(my_input);
-            release_my_reservation(my_input);
-        }
-
-        template< typename InputTuple, typename OutputTuple >
-        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
-            if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false;
-            if ( !join_helper<N-1>::reserve( my_input, out ) ) {
-                release_my_reservation( my_input );
-                return false;
-            }
-            return true;
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
-            bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail
-            return join_helper<N-1>::get_my_item(my_input, out) && res;       // do get on other inputs before returning
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
-            return get_my_item(my_input, out);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_my_port(InputTuple &my_input) {
-            join_helper<N-1>::reset_my_port(my_input);
-            std::get<N-1>(my_input).reset_port();
-        }
-
-        template<typename InputTuple>
-        static inline void reset_ports(InputTuple& my_input) {
-            reset_my_port(my_input);
-        }
-
-        template<typename InputTuple, typename KeyFuncTuple>
-        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) {
-            std::get<N-1>(my_input).set_my_key_func(std::get<N-1>(my_key_funcs));
-            std::get<N-1>(my_key_funcs) = nullptr;
-            join_helper<N-1>::set_key_functors(my_input, my_key_funcs);
-        }
-
-        template< typename KeyFuncTuple>
-        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) {
-            __TBB_ASSERT(
-                std::get<N-1>(other_inputs).get_my_key_func(),
-                "key matching join node should not be instantiated without functors."
-            );
-            std::get<N-1>(my_inputs).set_my_key_func(std::get<N-1>(other_inputs).get_my_key_func()->clone());
-            join_helper<N-1>::copy_key_functors(my_inputs, other_inputs);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            join_helper<N-1>::reset_inputs(my_input, f);
-            std::get<N-1>(my_input).reset_receiver(f);
-        }
-    };  // join_helper<N>
-
-    template< >
-    struct join_helper<1> {
-
-        template< typename TupleType, typename PortType >
-        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) {
-            std::get<0>( my_input ).set_join_node_pointer(port);
-        }
-
-        template< typename TupleType >
-        static inline void consume_reservations( TupleType &my_input ) {
-            std::get<0>( my_input ).consume();
-        }
-
-        template< typename TupleType >
-        static inline void release_my_reservation( TupleType &my_input ) {
-            std::get<0>( my_input ).release();
-        }
-
-        template<typename TupleType>
-        static inline void release_reservations( TupleType &my_input) {
-            release_my_reservation(my_input);
-        }
-
-        template< typename InputTuple, typename OutputTuple >
-        static inline bool reserve( InputTuple &my_input, OutputTuple &out) {
-            return std::get<0>( my_input ).reserve( std::get<0>( out ) );
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) {
-            return std::get<0>(my_input).get_item(std::get<0>(out));
-        }
-
-        template<typename InputTuple, typename OutputTuple>
-        static inline bool get_items(InputTuple &my_input, OutputTuple &out) {
-            return get_my_item(my_input, out);
-        }
-
-        template<typename InputTuple>
-        static inline void reset_my_port(InputTuple &my_input) {
-            std::get<0>(my_input).reset_port();
-        }
-
-        template<typename InputTuple>
-        static inline void reset_ports(InputTuple& my_input) {
-            reset_my_port(my_input);
-        }
-
-        template<typename InputTuple, typename KeyFuncTuple>
-        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) {
-            std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs));
-            std::get<0>(my_key_funcs) = nullptr;
-        }
-
-        template< typename KeyFuncTuple>
-        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) {
-            __TBB_ASSERT(
-                std::get<0>(other_inputs).get_my_key_func(),
-                "key matching join node should not be instantiated without functors."
-            );
-            std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone());
-        }
-        template<typename InputTuple>
-        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
-            std::get<0>(my_input).reset_receiver(f);
-        }
-    };  // join_helper<1>
-
-    //! The two-phase join port
-    template< typename T >
-    class reserving_port : public receiver<T> {
-    public:
-        typedef T input_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res
-        };
-        typedef reserving_port<T> class_type;
-
-        class reserving_port_operation : public aggregated_operation<reserving_port_operation> {
-        public:
-            char type;
-            union {
-                T *my_arg;
-                predecessor_type *my_pred;
-            };
-            reserving_port_operation(const T& e, op_type t) :
-                type(char(t)), my_arg(const_cast<T*>(&e)) {}
-            reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)),
-                my_pred(const_cast<predecessor_type *>(&s)) {}
-            reserving_port_operation(op_type t) : type(char(t)) {}
-        };
-
-        typedef aggregating_functor<class_type, reserving_port_operation> handler_type;
-        friend class aggregating_functor<class_type, reserving_port_operation>;
-        aggregator<handler_type, reserving_port_operation> my_aggregator;
-
-        void handle_operations(reserving_port_operation* op_list) {
-            reserving_port_operation *current;
-            bool was_missing_predecessors = false;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case reg_pred:
-                    was_missing_predecessors = my_predecessors.empty();
-                    my_predecessors.add(*(current->my_pred));
-                    if ( was_missing_predecessors ) {
-                        (void) my_join->decrement_port_count(); // may try to forward
-                    }
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case rem_pred:
-                    if ( !my_predecessors.empty() ) {
-                        my_predecessors.remove(*(current->my_pred));
-                        if ( my_predecessors.empty() ) // was the last predecessor
-                            my_join->increment_port_count();
-                    }
-                    // TODO: consider returning failure if there were no predecessors to remove
-                    current->status.store( SUCCEEDED, std::memory_order_release );
-                    break;
-                case res_item:
-                    if ( reserved ) {
-                        current->status.store( FAILED, std::memory_order_release);
-                    }
-                    else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) {
-                        reserved = true;
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    } else {
-                        if ( my_predecessors.empty() ) {
-                            my_join->increment_port_count();
-                        }
-                        current->status.store( FAILED, std::memory_order_release);
-                    }
-                    break;
-                case rel_res:
-                    reserved = false;
-                    my_predecessors.try_release( );
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case con_res:
-                    reserved = false;
-                    my_predecessors.try_consume( );
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                }
-            }
-        }
-
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class broadcast_cache;
-        template<typename X, typename Y> friend class round_robin_cache;
-        graph_task* try_put_task( const T & ) override {
-            return nullptr;
-        }
-
-        graph& graph_reference() const override {
-            return my_join->graph_ref;
-        }
-
-    public:
-
-        //! Constructor
-        reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) {
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        // copy constructor
-        reserving_port(const reserving_port& /* other */) = delete;
-
-        void set_join_node_pointer(reserving_forwarding_base *join) {
-            my_join = join;
-        }
-
-        //! Add a predecessor
-        bool register_predecessor( predecessor_type &src ) override {
-            reserving_port_operation op_data(src, reg_pred);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Remove a predecessor
-        bool remove_predecessor( predecessor_type &src ) override {
-            reserving_port_operation op_data(src, rem_pred);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Reserve an item from the port
-        bool reserve( T &v ) {
-            reserving_port_operation op_data(v, res_item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        //! Release the port
-        void release( ) {
-            reserving_port_operation op_data(rel_res);
-            my_aggregator.execute(&op_data);
-        }
-
-        //! Complete use of the port
-        void consume( ) {
-            reserving_port_operation op_data(con_res);
-            my_aggregator.execute(&op_data);
-        }
-
-        void reset_receiver( reset_flags f) {
-            if(f & rf_clear_edges) my_predecessors.clear();
-            else
-            my_predecessors.reset();
-            reserved = false;
-            __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed");
-        }
-
-    private:
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-        friend class get_graph_helper;
-#endif
-
-        reserving_forwarding_base *my_join;
-        reservable_predecessor_cache< T, null_mutex > my_predecessors;
-        bool reserved;
-    };  // reserving_port
-
-    //! queueing join_port
-    template<typename T>
-    class queueing_port : public receiver<T>, public item_buffer<T> {
-    public:
-        typedef T input_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-        typedef queueing_port<T> class_type;
-
-    // ----------- Aggregator ------------
-    private:
-        enum op_type { get__item, res_port, try__put_task
-        };
-
-        class queueing_port_operation : public aggregated_operation<queueing_port_operation> {
-        public:
-            char type;
-            T my_val;
-            T* my_arg;
-            graph_task* bypass_t;
-            // constructor for value parameter
-            queueing_port_operation(const T& e, op_type t) :
-                type(char(t)), my_val(e)
-                , bypass_t(nullptr)
-            {}
-            // constructor for pointer parameter
-            queueing_port_operation(const T* p, op_type t) :
-                type(char(t)), my_arg(const_cast<T*>(p))
-                , bypass_t(nullptr)
-            {}
-            // constructor with no parameter
-            queueing_port_operation(op_type t) : type(char(t))
-                , bypass_t(nullptr)
-            {}
-        };
-
-        typedef aggregating_functor<class_type, queueing_port_operation> handler_type;
-        friend class aggregating_functor<class_type, queueing_port_operation>;
-        aggregator<handler_type, queueing_port_operation> my_aggregator;
-
-        void handle_operations(queueing_port_operation* op_list) {
-            queueing_port_operation *current;
-            bool was_empty;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case try__put_task: {
-                        graph_task* rtask = nullptr;
-                        was_empty = this->buffer_empty();
-                        this->push_back(current->my_val);
-                        if (was_empty) rtask = my_join->decrement_port_count(false);
-                        else
-                            rtask = SUCCESSFULLY_ENQUEUED;
-                        current->bypass_t = rtask;
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    }
-                    break;
-                case get__item:
-                    if(!this->buffer_empty()) {
-                        *(current->my_arg) = this->front();
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    }
-                    else {
-                        current->status.store( FAILED, std::memory_order_release);
-                    }
-                    break;
-                case res_port:
-                    __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset");
-                    this->destroy_front();
-                    if(this->my_item_valid(this->my_head)) {
-                        (void)my_join->decrement_port_count(true);
-                    }
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                }
-            }
-        }
-    // ------------ End Aggregator ---------------
-
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class broadcast_cache;
-        template<typename X, typename Y> friend class round_robin_cache;
-        graph_task* try_put_task(const T &v) override {
-            queueing_port_operation op_data(v, try__put_task);
-            my_aggregator.execute(&op_data);
-            __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator");
-            if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED;
-            return op_data.bypass_t;
-        }
-
-        graph& graph_reference() const override {
-            return my_join->graph_ref;
-        }
-
-    public:
-
-        //! Constructor
-        queueing_port() : item_buffer<T>() {
-            my_join = nullptr;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        //! copy constructor
-        queueing_port(const queueing_port& /* other */) = delete;
-
-        //! record parent for tallying available items
-        void set_join_node_pointer(queueing_forwarding_base *join) {
-            my_join = join;
-        }
-
-        bool get_item( T &v ) {
-            queueing_port_operation op_data(&v, get__item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        // reset_port is called when item is accepted by successor, but
-        // is initiated by join_node.
-        void reset_port() {
-            queueing_port_operation op_data(res_port);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-        void reset_receiver(reset_flags) {
-            item_buffer<T>::reset();
-        }
-
-    private:
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-        friend class get_graph_helper;
-#endif
-
-        queueing_forwarding_base *my_join;
-    };  // queueing_port
-
-#include "_flow_graph_tagged_buffer_impl.h"
-
-    template<typename K>
-    struct count_element {
-        K my_key;
-        size_t my_value;
-    };
-
-    // method to access the key in the counting table
-    // the ref has already been removed from K
-    template< typename K >
-    struct key_to_count_functor {
-        typedef count_element<K> table_item_type;
-        const K& operator()(const table_item_type& v) { return v.my_key; }
-    };
-
-    // the ports can have only one template parameter.  We wrap the types needed in
-    // a traits type
-    template< class TraitsType >
-    class key_matching_port :
-        public receiver<typename TraitsType::T>,
-        public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK,
-                typename TraitsType::KHash > {
-    public:
-        typedef TraitsType traits;
-        typedef key_matching_port<traits> class_type;
-        typedef typename TraitsType::T input_type;
-        typedef typename TraitsType::K key_type;
-        typedef typename std::decay<key_type>::type noref_key_type;
-        typedef typename receiver<input_type>::predecessor_type predecessor_type;
-        typedef typename TraitsType::TtoK type_to_key_func_type;
-        typedef typename TraitsType::KHash hash_compare_type;
-        typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type;
-
-    private:
-// ----------- Aggregator ------------
-    private:
-        enum op_type { try__put, get__item, res_port
-        };
-
-        class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> {
-        public:
-            char type;
-            input_type my_val;
-            input_type *my_arg;
-            // constructor for value parameter
-            key_matching_port_operation(const input_type& e, op_type t) :
-                type(char(t)), my_val(e) {}
-            // constructor for pointer parameter
-            key_matching_port_operation(const input_type* p, op_type t) :
-                type(char(t)), my_arg(const_cast<input_type*>(p)) {}
-            // constructor with no parameter
-            key_matching_port_operation(op_type t) : type(char(t)) {}
-        };
-
-        typedef aggregating_functor<class_type, key_matching_port_operation> handler_type;
-        friend class aggregating_functor<class_type, key_matching_port_operation>;
-        aggregator<handler_type, key_matching_port_operation> my_aggregator;
-
-        void handle_operations(key_matching_port_operation* op_list) {
-            key_matching_port_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case try__put: {
-                        bool was_inserted = this->insert_with_key(current->my_val);
-                        // return failure if a duplicate insertion occurs
-                        current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release);
-                    }
-                    break;
-                case get__item:
-                    // use current_key from FE for item
-                    if(!this->find_with_key(my_join->current_key, *(current->my_arg))) {
-                        __TBB_ASSERT(false, "Failed to find item corresponding to current_key.");
-                    }
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case res_port:
-                    // use current_key from FE for item
-                    this->delete_with_key(my_join->current_key);
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                }
-            }
-        }
-// ------------ End Aggregator ---------------
-    protected:
-        template< typename R, typename B > friend class run_and_put_task;
-        template<typename X, typename Y> friend class broadcast_cache;
-        template<typename X, typename Y> friend class round_robin_cache;
-        graph_task* try_put_task(const input_type& v) override {
-            key_matching_port_operation op_data(v, try__put);
-            graph_task* rtask = nullptr;
-            my_aggregator.execute(&op_data);
-            if(op_data.status == SUCCEEDED) {
-                rtask = my_join->increment_key_count((*(this->get_key_func()))(v));  // may spawn
-                // rtask has to reflect the return status of the try_put
-                if(!rtask) rtask = SUCCESSFULLY_ENQUEUED;
-            }
-            return rtask;
-        }
-
-        graph& graph_reference() const override {
-            return my_join->graph_ref;
-        }
-
-    public:
-
-        key_matching_port() : receiver<input_type>(), buffer_type() {
-            my_join = nullptr;
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        // copy constructor
-        key_matching_port(const key_matching_port& /*other*/) = delete;
-#if __INTEL_COMPILER <= 2021
-        // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited
-        // class while the parent class has the virtual keyword for the destrocutor.
-        virtual
-#endif
-        ~key_matching_port() { }
-
-        void set_join_node_pointer(forwarding_base *join) {
-            my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join);
-        }
-
-        void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); }
-
-        type_to_key_func_type* get_my_key_func() { return this->get_key_func(); }
-
-        bool get_item( input_type &v ) {
-            // aggregator uses current_key from FE for Key
-            key_matching_port_operation op_data(&v, get__item);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        // reset_port is called when item is accepted by successor, but
-        // is initiated by join_node.
-        void reset_port() {
-            key_matching_port_operation op_data(res_port);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-        void reset_receiver(reset_flags ) {
-            buffer_type::reset();
-        }
-
-    private:
-        // my_join forwarding base used to count number of inputs that
-        // received key.
-        matching_forwarding_base<key_type> *my_join;
-    };  // key_matching_port
-
-    using namespace graph_policy_namespace;
-
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_base;
-
-    //! join_node_FE : implements input port policy
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_FE;
-
-    template<typename InputTuple, typename OutputTuple>
-    class join_node_FE<reserving, InputTuple, OutputTuple> : public reserving_forwarding_base {
-    public:
-        static const int N = std::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding
-
-        join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) {
-            ports_with_no_inputs = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) {
-            ports_with_no_inputs = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-       void increment_port_count() override {
-            ++ports_with_no_inputs;
-        }
-
-        // if all input_ports have predecessors, spawn forward to try and consume tuples
-        graph_task* decrement_port_count() override {
-            if(ports_with_no_inputs.fetch_sub(1) == 1) {
-                if(is_graph_active(this->graph_ref)) {
-                    small_object_allocator allocator{};
-                    typedef forward_task_bypass<base_node_type> task_type;
-                    graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node);
-                    graph_ref.reserve_wait();
-                    spawn_in_graph_arena(this->graph_ref, *t);
-                }
-            }
-            return nullptr;
-        }
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f) {
-            // called outside of parallel contexts
-            ports_with_no_inputs = N;
-            join_helper<N>::reset_inputs(my_inputs, f);
-        }
-
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {
-            return !ports_with_no_inputs;
-        }
-
-        bool try_to_make_tuple(output_type &out) {
-            if(ports_with_no_inputs) return false;
-            return join_helper<N>::reserve(my_inputs, out);
-        }
-
-        void tuple_accepted() {
-            join_helper<N>::consume_reservations(my_inputs);
-        }
-        void tuple_rejected() {
-            join_helper<N>::release_reservations(my_inputs);
-        }
-
-        input_type my_inputs;
-        base_node_type *my_node;
-        std::atomic<std::size_t> ports_with_no_inputs;
-    };  // join_node_FE<reserving, ... >
-
-    template<typename InputTuple, typename OutputTuple>
-    class join_node_FE<queueing, InputTuple, OutputTuple> : public queueing_forwarding_base {
-    public:
-        static const int N = std::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding
-
-        join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) {
-            ports_with_no_items = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) {
-            ports_with_no_items = N;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-        }
-
-        // needed for forwarding
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-        void reset_port_count() {
-            ports_with_no_items = N;
-        }
-
-        // if all input_ports have items, spawn forward to try and consume tuples
-        graph_task* decrement_port_count(bool handle_task) override
-        {
-            if(ports_with_no_items.fetch_sub(1) == 1) {
-                if(is_graph_active(this->graph_ref)) {
-                    small_object_allocator allocator{};
-                    typedef forward_task_bypass<base_node_type> task_type;
-                    graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node);
-                    graph_ref.reserve_wait();
-                    if( !handle_task )
-                        return t;
-                    spawn_in_graph_arena(this->graph_ref, *t);
-                }
-            }
-            return nullptr;
-        }
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f) {
-            reset_port_count();
-            join_helper<N>::reset_inputs(my_inputs, f );
-        }
-
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {
-            return !ports_with_no_items;
-        }
-
-        bool try_to_make_tuple(output_type &out) {
-            if(ports_with_no_items) return false;
-            return join_helper<N>::get_items(my_inputs, out);
-        }
-
-        void tuple_accepted() {
-            reset_port_count();
-            join_helper<N>::reset_ports(my_inputs);
-        }
-        void tuple_rejected() {
-            // nothing to do.
-        }
-
-        input_type my_inputs;
-        base_node_type *my_node;
-        std::atomic<std::size_t> ports_with_no_items;
-    };  // join_node_FE<queueing, ...>
-
-    // key_matching join front-end.
-    template<typename InputTuple, typename OutputTuple, typename K, typename KHash>
-    class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>,
-             // buffer of key value counts
-              public hash_buffer<   // typedefed below to key_to_count_buffer_type
-                  typename std::decay<K>::type&,        // force ref type on K
-                  count_element<typename std::decay<K>::type>,
-                  type_to_key_function_body<
-                      count_element<typename std::decay<K>::type>,
-                      typename std::decay<K>::type& >,
-                  KHash >,
-             // buffer of output items
-             public item_buffer<OutputTuple> {
-    public:
-        static const int N = std::tuple_size<OutputTuple>::value;
-        typedef OutputTuple output_type;
-        typedef InputTuple input_type;
-        typedef K key_type;
-        typedef typename std::decay<key_type>::type unref_key_type;
-        typedef KHash key_hash_compare;
-        // must use K without ref.
-        typedef count_element<unref_key_type> count_element_type;
-        // method that lets us refer to the key of this type.
-        typedef key_to_count_functor<unref_key_type> key_to_count_func;
-        typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type;
-        typedef type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type;
-        // this is the type of the special table that keeps track of the number of discrete
-        // elements corresponding to each key that we've seen.
-        typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare >
-                 key_to_count_buffer_type;
-        typedef item_buffer<output_type> output_buffer_type;
-        typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding
-        typedef matching_forwarding_base<key_type> forwarding_base_type;
-
-// ----------- Aggregator ------------
-        // the aggregator is only needed to serialize the access to the hash table.
-        // and the output_buffer_type base class
-    private:
-        enum op_type { res_count, inc_count, may_succeed, try_make };
-        typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type;
-
-        class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> {
-        public:
-            char type;
-            unref_key_type my_val;
-            output_type* my_output;
-            graph_task* bypass_t;
-            // constructor for value parameter
-            key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e),
-                 my_output(nullptr), bypass_t(nullptr) {}
-            key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {}
-            // constructor with no parameter
-            key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {}
-        };
-
-        typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type;
-        friend class aggregating_functor<class_type, key_matching_FE_operation>;
-        aggregator<handler_type, key_matching_FE_operation> my_aggregator;
-
-        // called from aggregator, so serialized
-        // returns a task pointer if the a task would have been enqueued but we asked that
-        // it be returned.  Otherwise returns nullptr.
-        graph_task* fill_output_buffer(unref_key_type &t) {
-            output_type l_out;
-            graph_task* rtask = nullptr;
-            bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref);
-            this->current_key = t;
-            this->delete_with_key(this->current_key);   // remove the key
-            if(join_helper<N>::get_items(my_inputs, l_out)) {  //  <== call back
-                this->push_back(l_out);
-                if(do_fwd) {  // we enqueue if receiving an item from predecessor, not if successor asks for item
-                    small_object_allocator allocator{};
-                    typedef forward_task_bypass<base_node_type> task_type;
-                    rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node);
-                    this->graph_ref.reserve_wait();
-                    do_fwd = false;
-                }
-                // retire the input values
-                join_helper<N>::reset_ports(my_inputs);  //  <== call back
-            }
-            else {
-                __TBB_ASSERT(false, "should have had something to push");
-            }
-            return rtask;
-        }
-
-        void handle_operations(key_matching_FE_operation* op_list) {
-            key_matching_FE_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case res_count:  // called from BE
-                    {
-                        this->destroy_front();
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    }
-                    break;
-                case inc_count: {  // called from input ports
-                        count_element_type *p = 0;
-                        unref_key_type &t = current->my_val;
-                        if(!(this->find_ref_with_key(t,p))) {
-                            count_element_type ev;
-                            ev.my_key = t;
-                            ev.my_value = 0;
-                            this->insert_with_key(ev);
-                            bool found = this->find_ref_with_key(t, p);
-                            __TBB_ASSERT_EX(found, "should find key after inserting it");
-                        }
-                        if(++(p->my_value) == size_t(N)) {
-                            current->bypass_t = fill_output_buffer(t);
-                        }
-                    }
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case may_succeed:  // called from BE
-                    current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release);
-                    break;
-                case try_make:  // called from BE
-                    if(this->buffer_empty()) {
-                        current->status.store( FAILED, std::memory_order_release);
-                    }
-                    else {
-                        *(current->my_output) = this->front();
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    }
-                    break;
-                }
-            }
-        }
-// ------------ End Aggregator ---------------
-
-    public:
-        template<typename FunctionTuple>
-        join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) {
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-            join_helper<N>::set_key_functors(my_inputs, TtoK_funcs);
-            my_aggregator.initialize_handler(handler_type(this));
-                    TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func());
-            this->set_key_func(cfb);
-        }
-
-        join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(),
-        output_buffer_type() {
-            my_node = nullptr;
-            join_helper<N>::set_join_node_pointer(my_inputs, this);
-            join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs));
-            my_aggregator.initialize_handler(handler_type(this));
-            TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func());
-            this->set_key_func(cfb);
-        }
-
-        // needed for forwarding
-        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; }
-
-        void reset_port_count() {  // called from BE
-            key_matching_FE_operation op_data(res_count);
-            my_aggregator.execute(&op_data);
-            return;
-        }
-
-        // if all input_ports have items, spawn forward to try and consume tuples
-        // return a task if we are asked and did create one.
-        graph_task *increment_key_count(unref_key_type const & t) override {  // called from input_ports
-            key_matching_FE_operation op_data(t, inc_count);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-        input_type &input_ports() { return my_inputs; }
-
-    protected:
-
-        void reset(  reset_flags f ) {
-            // called outside of parallel contexts
-            join_helper<N>::reset_inputs(my_inputs, f);
-
-            key_to_count_buffer_type::reset();
-            output_buffer_type::reset();
-        }
-
-        // all methods on input ports should be called under mutual exclusion from join_node_base.
-
-        bool tuple_build_may_succeed() {  // called from back-end
-            key_matching_FE_operation op_data(may_succeed);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        // cannot lock while calling back to input_ports.  current_key will only be set
-        // and reset under the aggregator, so it will remain consistent.
-        bool try_to_make_tuple(output_type &out) {
-            key_matching_FE_operation op_data(&out,try_make);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        void tuple_accepted() {
-            reset_port_count();  // reset current_key after ports reset.
-        }
-
-        void tuple_rejected() {
-            // nothing to do.
-        }
-
-        input_type my_inputs;  // input ports
-        base_node_type *my_node;
-    }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple>
-
-    //! join_node_base
-    template<typename JP, typename InputTuple, typename OutputTuple>
-    class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>,
-                           public sender<OutputTuple> {
-    protected:
-        using graph_node::my_graph;
-    public:
-        typedef OutputTuple output_type;
-
-        typedef typename sender<output_type>::successor_type successor_type;
-        typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type;
-        using input_ports_type::tuple_build_may_succeed;
-        using input_ports_type::try_to_make_tuple;
-        using input_ports_type::tuple_accepted;
-        using input_ports_type::tuple_rejected;
-
-    private:
-        // ----------- Aggregator ------------
-        enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass
-        };
-        typedef join_node_base<JP,InputTuple,OutputTuple> class_type;
-
-        class join_node_base_operation : public aggregated_operation<join_node_base_operation> {
-        public:
-            char type;
-            union {
-                output_type *my_arg;
-                successor_type *my_succ;
-            };
-            graph_task* bypass_t;
-            join_node_base_operation(const output_type& e, op_type t) : type(char(t)),
-                my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {}
-            join_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
-                my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {}
-            join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {}
-        };
-
-        typedef aggregating_functor<class_type, join_node_base_operation> handler_type;
-        friend class aggregating_functor<class_type, join_node_base_operation>;
-        bool forwarder_busy;
-        aggregator<handler_type, join_node_base_operation> my_aggregator;
-
-        void handle_operations(join_node_base_operation* op_list) {
-            join_node_base_operation *current;
-            while(op_list) {
-                current = op_list;
-                op_list = op_list->next;
-                switch(current->type) {
-                case reg_succ: {
-                        my_successors.register_successor(*(current->my_succ));
-                        if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) {
-                            small_object_allocator allocator{};
-                            typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type;
-                            graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this);
-                            my_graph.reserve_wait();
-                            spawn_in_graph_arena(my_graph, *t);
-                            forwarder_busy = true;
-                        }
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                    }
-                    break;
-                case rem_succ:
-                    my_successors.remove_successor(*(current->my_succ));
-                    current->status.store( SUCCEEDED, std::memory_order_release);
-                    break;
-                case try__get:
-                    if(tuple_build_may_succeed()) {
-                        if(try_to_make_tuple(*(current->my_arg))) {
-                            tuple_accepted();
-                            current->status.store( SUCCEEDED, std::memory_order_release);
-                        }
-                        else current->status.store( FAILED, std::memory_order_release);
-                    }
-                    else current->status.store( FAILED, std::memory_order_release);
-                    break;
-                case do_fwrd_bypass: {
-                        bool build_succeeded;
-                        graph_task *last_task = nullptr;
-                        output_type out;
-                        // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted
-                        // are separate locked methods in the FE.  We could conceivably fetch the front
-                        // of the FE queue, then be swapped out, have someone else consume the FE's
-                        // object, then come back, forward, and then try to remove it from the queue
-                        // again. Without reservation of the FE, the methods accessing it must be locked.
-                        // We could remember the keys of the objects we forwarded, and then remove
-                        // them from the input ports after forwarding is complete?
-                        if(tuple_build_may_succeed()) {  // checks output queue of FE
-                            do {
-                                build_succeeded = try_to_make_tuple(out);  // fetch front_end of queue
-                                if(build_succeeded) {
-                                    graph_task *new_task = my_successors.try_put_task(out);
-                                    last_task = combine_tasks(my_graph, last_task, new_task);
-                                    if(new_task) {
-                                        tuple_accepted();
-                                    }
-                                    else {
-                                        tuple_rejected();
-                                        build_succeeded = false;
-                                    }
-                                }
-                            } while(build_succeeded);
-                        }
-                        current->bypass_t = last_task;
-                        current->status.store( SUCCEEDED, std::memory_order_release);
-                        forwarder_busy = false;
-                    }
-                    break;
-                }
-            }
-        }
-        // ---------- end aggregator -----------
-    public:
-        join_node_base(graph &g)
-            : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this)
-        {
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        join_node_base(const join_node_base& other) :
-            graph_node(other.graph_node::my_graph), input_ports_type(other),
-            sender<OutputTuple>(), forwarder_busy(false), my_successors(this)
-        {
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        template<typename FunctionTuple>
-        join_node_base(graph &g, FunctionTuple f)
-            : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this)
-        {
-            input_ports_type::set_my_node(this);
-            my_aggregator.initialize_handler(handler_type(this));
-        }
-
-        bool register_successor(successor_type &r) override {
-            join_node_base_operation op_data(r, reg_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool remove_successor( successor_type &r) override {
-            join_node_base_operation op_data(r, rem_succ);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-        bool try_get( output_type &v) override {
-            join_node_base_operation op_data(v, try__get);
-            my_aggregator.execute(&op_data);
-            return op_data.status == SUCCEEDED;
-        }
-
-    protected:
-        void reset_node(reset_flags f) override {
-            input_ports_type::reset(f);
-            if(f & rf_clear_edges) my_successors.clear();
-        }
-
-    private:
-        broadcast_cache<output_type, null_rw_mutex> my_successors;
-
-        friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >;
-        graph_task *forward_task() {
-            join_node_base_operation op_data(do_fwrd_bypass);
-            my_aggregator.execute(&op_data);
-            return op_data.bypass_t;
-        }
-
-    };  // join_node_base
-
-    // join base class type generator
-    template<int N, template<class> class PT, typename OutputTuple, typename JP>
-    struct join_base {
-        typedef join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type;
-    };
-
-    template<int N, typename OutputTuple, typename K, typename KHash>
-    struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > {
-        typedef key_matching<K, KHash> key_traits_type;
-        typedef K key_type;
-        typedef KHash key_hash_compare;
-        typedef join_node_base< key_traits_type,
-                // ports type
-                typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type,
-                OutputTuple > type;
-    };
-
-    //! unfolded_join_node : passes input_ports_type to join_node_base.  We build the input port type
-    //  using tuple_element.  The class PT is the port type (reserving_port, queueing_port, key_matching_port)
-    //  and should match the typename.
-
-    template<int N, template<class> class PT, typename OutputTuple, typename JP>
-    class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type {
-    public:
-        typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<JP, input_ports_type, output_type > base_type;
-    public:
-        unfolded_join_node(graph &g) : base_type(g) {}
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    template <typename K, typename T>
-    struct key_from_message_body {
-        K operator()(const T& t) const {
-            return key_from_message<K>(t);
-        }
-    };
-    // Adds const to reference type
-    template <typename K, typename T>
-    struct key_from_message_body<K&,T> {
-        const K& operator()(const T& t) const {
-            return key_from_message<const K&>(t);
-        }
-    };
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-    // key_matching unfolded_join_node.  This must be a separate specialization because the constructors
-    // differ.
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-    public:
-        typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef std::tuple< f0_p, f1_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-    public:
-        typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-    public:
-        typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-    public:
-        typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-
-#if __TBB_VARIADIC_MAX >= 6
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-        typedef typename std::tuple_element<5, OutputTuple>::type T5;
-    public:
-        typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef type_to_key_function_body<T5, K> *f5_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5)
-                : base_type(g, func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new type_to_key_function_body_leaf<T5, K, Body5>(body5)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 7
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-        typedef typename std::tuple_element<5, OutputTuple>::type T5;
-        typedef typename std::tuple_element<6, OutputTuple>::type T6;
-    public:
-        typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef type_to_key_function_body<T5, K> *f5_p;
-        typedef type_to_key_function_body<T6, K> *f6_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6) : base_type(g, func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new type_to_key_function_body_leaf<T6, K, Body6>(body6)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 8
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-        typedef typename std::tuple_element<5, OutputTuple>::type T5;
-        typedef typename std::tuple_element<6, OutputTuple>::type T6;
-        typedef typename std::tuple_element<7, OutputTuple>::type T7;
-    public:
-        typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef type_to_key_function_body<T5, K> *f5_p;
-        typedef type_to_key_function_body<T6, K> *f6_p;
-        typedef type_to_key_function_body<T7, K> *f7_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6, typename Body7>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new type_to_key_function_body_leaf<T7, K, Body7>(body7)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 9
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-        typedef typename std::tuple_element<5, OutputTuple>::type T5;
-        typedef typename std::tuple_element<6, OutputTuple>::type T6;
-        typedef typename std::tuple_element<7, OutputTuple>::type T7;
-        typedef typename std::tuple_element<8, OutputTuple>::type T8;
-    public:
-        typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef type_to_key_function_body<T5, K> *f5_p;
-        typedef type_to_key_function_body<T6, K> *f6_p;
-        typedef type_to_key_function_body<T7, K> *f7_p;
-        typedef type_to_key_function_body<T8, K> *f8_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()),
-                    new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-                 typename Body5, typename Body6, typename Body7, typename Body8>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new type_to_key_function_body_leaf<T7, K, Body7>(body7),
-                    new type_to_key_function_body_leaf<T8, K, Body8>(body8)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-#if __TBB_VARIADIC_MAX >= 10
-    template<typename OutputTuple, typename K, typename KHash>
-    class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public
-            join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type {
-        typedef typename std::tuple_element<0, OutputTuple>::type T0;
-        typedef typename std::tuple_element<1, OutputTuple>::type T1;
-        typedef typename std::tuple_element<2, OutputTuple>::type T2;
-        typedef typename std::tuple_element<3, OutputTuple>::type T3;
-        typedef typename std::tuple_element<4, OutputTuple>::type T4;
-        typedef typename std::tuple_element<5, OutputTuple>::type T5;
-        typedef typename std::tuple_element<6, OutputTuple>::type T6;
-        typedef typename std::tuple_element<7, OutputTuple>::type T7;
-        typedef typename std::tuple_element<8, OutputTuple>::type T8;
-        typedef typename std::tuple_element<9, OutputTuple>::type T9;
-    public:
-        typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type;
-        typedef OutputTuple output_type;
-    private:
-        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type;
-        typedef type_to_key_function_body<T0, K> *f0_p;
-        typedef type_to_key_function_body<T1, K> *f1_p;
-        typedef type_to_key_function_body<T2, K> *f2_p;
-        typedef type_to_key_function_body<T3, K> *f3_p;
-        typedef type_to_key_function_body<T4, K> *f4_p;
-        typedef type_to_key_function_body<T5, K> *f5_p;
-        typedef type_to_key_function_body<T6, K> *f6_p;
-        typedef type_to_key_function_body<T7, K> *f7_p;
-        typedef type_to_key_function_body<T8, K> *f8_p;
-        typedef type_to_key_function_body<T9, K> *f9_p;
-        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type;
-    public:
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-        unfolded_join_node(graph &g) : base_type(g,
-                func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()),
-                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()),
-                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()),
-                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()),
-                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()),
-                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()),
-                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()),
-                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()),
-                    new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()),
-                    new type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>())
-                    ) ) {
-        }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4,
-            typename Body5, typename Body6, typename Body7, typename Body8, typename Body9>
-        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4,
-                Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type(
-                    new type_to_key_function_body_leaf<T0, K, Body0>(body0),
-                    new type_to_key_function_body_leaf<T1, K, Body1>(body1),
-                    new type_to_key_function_body_leaf<T2, K, Body2>(body2),
-                    new type_to_key_function_body_leaf<T3, K, Body3>(body3),
-                    new type_to_key_function_body_leaf<T4, K, Body4>(body4),
-                    new type_to_key_function_body_leaf<T5, K, Body5>(body5),
-                    new type_to_key_function_body_leaf<T6, K, Body6>(body6),
-                    new type_to_key_function_body_leaf<T7, K, Body7>(body7),
-                    new type_to_key_function_body_leaf<T8, K, Body8>(body8),
-                    new type_to_key_function_body_leaf<T9, K, Body9>(body9)
-                    ) ) {
-            static_assert(std::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers");
-        }
-        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {}
-    };
-#endif
-
-    //! templated function to refer to input ports of the join node
-    template<size_t N, typename JNT>
-    typename std::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) {
-        return std::get<N>(jn.input_ports());
-    }
-
-#endif // __TBB__flow_graph_join_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_join_impl_H 
+#define __TBB__flow_graph_join_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included into namespace tbb::detail::d1 
+ 
+    struct forwarding_base : no_assign { 
+        forwarding_base(graph &g) : graph_ref(g) {} 
+        virtual ~forwarding_base() {} 
+        graph& graph_ref; 
+    }; 
+ 
+    struct queueing_forwarding_base : forwarding_base { 
+        using forwarding_base::forwarding_base; 
+        // decrement_port_count may create a forwarding task.  If we cannot handle the task 
+        // ourselves, ask decrement_port_count to deal with it. 
+        virtual graph_task* decrement_port_count(bool handle_task) = 0; 
+    }; 
+ 
+    struct reserving_forwarding_base : forwarding_base { 
+        using forwarding_base::forwarding_base; 
+        // decrement_port_count may create a forwarding task.  If we cannot handle the task 
+        // ourselves, ask decrement_port_count to deal with it. 
+        virtual graph_task* decrement_port_count() = 0; 
+        virtual void increment_port_count() = 0; 
+    }; 
+ 
+    // specialization that lets us keep a copy of the current_key for building results. 
+    // KeyType can be a reference type. 
+    template<typename KeyType> 
+    struct matching_forwarding_base : public forwarding_base { 
+        typedef typename std::decay<KeyType>::type current_key_type; 
+        matching_forwarding_base(graph &g) : forwarding_base(g) { } 
+        virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; 
+        current_key_type current_key; // so ports can refer to FE's desired items 
+    }; 
+ 
+    template< int N > 
+    struct join_helper { 
+ 
+        template< typename TupleType, typename PortType > 
+        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { 
+            std::get<N-1>( my_input ).set_join_node_pointer(port); 
+            join_helper<N-1>::set_join_node_pointer( my_input, port ); 
+        } 
+        template< typename TupleType > 
+        static inline void consume_reservations( TupleType &my_input ) { 
+            std::get<N-1>( my_input ).consume(); 
+            join_helper<N-1>::consume_reservations( my_input ); 
+        } 
+ 
+        template< typename TupleType > 
+        static inline void release_my_reservation( TupleType &my_input ) { 
+            std::get<N-1>( my_input ).release(); 
+        } 
+ 
+        template <typename TupleType> 
+        static inline void release_reservations( TupleType &my_input) { 
+            join_helper<N-1>::release_reservations(my_input); 
+            release_my_reservation(my_input); 
+        } 
+ 
+        template< typename InputTuple, typename OutputTuple > 
+        static inline bool reserve( InputTuple &my_input, OutputTuple &out) { 
+            if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false; 
+            if ( !join_helper<N-1>::reserve( my_input, out ) ) { 
+                release_my_reservation( my_input ); 
+                return false; 
+            } 
+            return true; 
+        } 
+ 
+        template<typename InputTuple, typename OutputTuple> 
+        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { 
+            bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail 
+            return join_helper<N-1>::get_my_item(my_input, out) && res;       // do get on other inputs before returning 
+        } 
+ 
+        template<typename InputTuple, typename OutputTuple> 
+        static inline bool get_items(InputTuple &my_input, OutputTuple &out) { 
+            return get_my_item(my_input, out); 
+        } 
+ 
+        template<typename InputTuple> 
+        static inline void reset_my_port(InputTuple &my_input) { 
+            join_helper<N-1>::reset_my_port(my_input); 
+            std::get<N-1>(my_input).reset_port(); 
+        } 
+ 
+        template<typename InputTuple> 
+        static inline void reset_ports(InputTuple& my_input) { 
+            reset_my_port(my_input); 
+        } 
+ 
+        template<typename InputTuple, typename KeyFuncTuple> 
+        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { 
+            std::get<N-1>(my_input).set_my_key_func(std::get<N-1>(my_key_funcs)); 
+            std::get<N-1>(my_key_funcs) = nullptr; 
+            join_helper<N-1>::set_key_functors(my_input, my_key_funcs); 
+        } 
+ 
+        template< typename KeyFuncTuple> 
+        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { 
+            __TBB_ASSERT( 
+                std::get<N-1>(other_inputs).get_my_key_func(), 
+                "key matching join node should not be instantiated without functors." 
+            ); 
+            std::get<N-1>(my_inputs).set_my_key_func(std::get<N-1>(other_inputs).get_my_key_func()->clone()); 
+            join_helper<N-1>::copy_key_functors(my_inputs, other_inputs); 
+        } 
+ 
+        template<typename InputTuple> 
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) { 
+            join_helper<N-1>::reset_inputs(my_input, f); 
+            std::get<N-1>(my_input).reset_receiver(f); 
+        } 
+    };  // join_helper<N> 
+ 
+    template< > 
+    struct join_helper<1> { 
+ 
+        template< typename TupleType, typename PortType > 
+        static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { 
+            std::get<0>( my_input ).set_join_node_pointer(port); 
+        } 
+ 
+        template< typename TupleType > 
+        static inline void consume_reservations( TupleType &my_input ) { 
+            std::get<0>( my_input ).consume(); 
+        } 
+ 
+        template< typename TupleType > 
+        static inline void release_my_reservation( TupleType &my_input ) { 
+            std::get<0>( my_input ).release(); 
+        } 
+ 
+        template<typename TupleType> 
+        static inline void release_reservations( TupleType &my_input) { 
+            release_my_reservation(my_input); 
+        } 
+ 
+        template< typename InputTuple, typename OutputTuple > 
+        static inline bool reserve( InputTuple &my_input, OutputTuple &out) { 
+            return std::get<0>( my_input ).reserve( std::get<0>( out ) ); 
+        } 
+ 
+        template<typename InputTuple, typename OutputTuple> 
+        static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { 
+            return std::get<0>(my_input).get_item(std::get<0>(out)); 
+        } 
+ 
+        template<typename InputTuple, typename OutputTuple> 
+        static inline bool get_items(InputTuple &my_input, OutputTuple &out) { 
+            return get_my_item(my_input, out); 
+        } 
+ 
+        template<typename InputTuple> 
+        static inline void reset_my_port(InputTuple &my_input) { 
+            std::get<0>(my_input).reset_port(); 
+        } 
+ 
+        template<typename InputTuple> 
+        static inline void reset_ports(InputTuple& my_input) { 
+            reset_my_port(my_input); 
+        } 
+ 
+        template<typename InputTuple, typename KeyFuncTuple> 
+        static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { 
+            std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); 
+            std::get<0>(my_key_funcs) = nullptr; 
+        } 
+ 
+        template< typename KeyFuncTuple> 
+        static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { 
+            __TBB_ASSERT( 
+                std::get<0>(other_inputs).get_my_key_func(), 
+                "key matching join node should not be instantiated without functors." 
+            ); 
+            std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); 
+        } 
+        template<typename InputTuple> 
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) { 
+            std::get<0>(my_input).reset_receiver(f); 
+        } 
+    };  // join_helper<1> 
+ 
+    //! The two-phase join port 
+    template< typename T > 
+    class reserving_port : public receiver<T> { 
+    public: 
+        typedef T input_type; 
+        typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+ 
+    private: 
+        // ----------- Aggregator ------------ 
+        enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res 
+        }; 
+        typedef reserving_port<T> class_type; 
+ 
+        class reserving_port_operation : public aggregated_operation<reserving_port_operation> { 
+        public: 
+            char type; 
+            union { 
+                T *my_arg; 
+                predecessor_type *my_pred; 
+            }; 
+            reserving_port_operation(const T& e, op_type t) : 
+                type(char(t)), my_arg(const_cast<T*>(&e)) {} 
+            reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), 
+                my_pred(const_cast<predecessor_type *>(&s)) {} 
+            reserving_port_operation(op_type t) : type(char(t)) {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, reserving_port_operation> handler_type; 
+        friend class aggregating_functor<class_type, reserving_port_operation>; 
+        aggregator<handler_type, reserving_port_operation> my_aggregator; 
+ 
+        void handle_operations(reserving_port_operation* op_list) { 
+            reserving_port_operation *current; 
+            bool was_missing_predecessors = false; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+                case reg_pred: 
+                    was_missing_predecessors = my_predecessors.empty(); 
+                    my_predecessors.add(*(current->my_pred)); 
+                    if ( was_missing_predecessors ) { 
+                        (void) my_join->decrement_port_count(); // may try to forward 
+                    } 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case rem_pred: 
+                    if ( !my_predecessors.empty() ) { 
+                        my_predecessors.remove(*(current->my_pred)); 
+                        if ( my_predecessors.empty() ) // was the last predecessor 
+                            my_join->increment_port_count(); 
+                    } 
+                    // TODO: consider returning failure if there were no predecessors to remove 
+                    current->status.store( SUCCEEDED, std::memory_order_release ); 
+                    break; 
+                case res_item: 
+                    if ( reserved ) { 
+                        current->status.store( FAILED, std::memory_order_release); 
+                    } 
+                    else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { 
+                        reserved = true; 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } else { 
+                        if ( my_predecessors.empty() ) { 
+                            my_join->increment_port_count(); 
+                        } 
+                        current->status.store( FAILED, std::memory_order_release); 
+                    } 
+                    break; 
+                case rel_res: 
+                    reserved = false; 
+                    my_predecessors.try_release( ); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case con_res: 
+                    reserved = false; 
+                    my_predecessors.try_consume( ); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                } 
+            } 
+        } 
+ 
+    protected: 
+        template< typename R, typename B > friend class run_and_put_task; 
+        template<typename X, typename Y> friend class broadcast_cache; 
+        template<typename X, typename Y> friend class round_robin_cache; 
+        graph_task* try_put_task( const T & ) override { 
+            return nullptr; 
+        } 
+ 
+        graph& graph_reference() const override { 
+            return my_join->graph_ref; 
+        } 
+ 
+    public: 
+ 
+        //! Constructor 
+        reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        // copy constructor 
+        reserving_port(const reserving_port& /* other */) = delete; 
+ 
+        void set_join_node_pointer(reserving_forwarding_base *join) { 
+            my_join = join; 
+        } 
+ 
+        //! Add a predecessor 
+        bool register_predecessor( predecessor_type &src ) override { 
+            reserving_port_operation op_data(src, reg_pred); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        //! Remove a predecessor 
+        bool remove_predecessor( predecessor_type &src ) override { 
+            reserving_port_operation op_data(src, rem_pred); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        //! Reserve an item from the port 
+        bool reserve( T &v ) { 
+            reserving_port_operation op_data(v, res_item); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        //! Release the port 
+        void release( ) { 
+            reserving_port_operation op_data(rel_res); 
+            my_aggregator.execute(&op_data); 
+        } 
+ 
+        //! Complete use of the port 
+        void consume( ) { 
+            reserving_port_operation op_data(con_res); 
+            my_aggregator.execute(&op_data); 
+        } 
+ 
+        void reset_receiver( reset_flags f) { 
+            if(f & rf_clear_edges) my_predecessors.clear(); 
+            else 
+            my_predecessors.reset(); 
+            reserved = false; 
+            __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed"); 
+        } 
+ 
+    private: 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+        friend class get_graph_helper; 
+#endif 
+ 
+        reserving_forwarding_base *my_join; 
+        reservable_predecessor_cache< T, null_mutex > my_predecessors; 
+        bool reserved; 
+    };  // reserving_port 
+ 
+    //! queueing join_port 
+    template<typename T> 
+    class queueing_port : public receiver<T>, public item_buffer<T> { 
+    public: 
+        typedef T input_type; 
+        typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+        typedef queueing_port<T> class_type; 
+ 
+    // ----------- Aggregator ------------ 
+    private: 
+        enum op_type { get__item, res_port, try__put_task 
+        }; 
+ 
+        class queueing_port_operation : public aggregated_operation<queueing_port_operation> { 
+        public: 
+            char type; 
+            T my_val; 
+            T* my_arg; 
+            graph_task* bypass_t; 
+            // constructor for value parameter 
+            queueing_port_operation(const T& e, op_type t) : 
+                type(char(t)), my_val(e) 
+                , bypass_t(nullptr) 
+            {} 
+            // constructor for pointer parameter 
+            queueing_port_operation(const T* p, op_type t) : 
+                type(char(t)), my_arg(const_cast<T*>(p)) 
+                , bypass_t(nullptr) 
+            {} 
+            // constructor with no parameter 
+            queueing_port_operation(op_type t) : type(char(t)) 
+                , bypass_t(nullptr) 
+            {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, queueing_port_operation> handler_type; 
+        friend class aggregating_functor<class_type, queueing_port_operation>; 
+        aggregator<handler_type, queueing_port_operation> my_aggregator; 
+ 
+        void handle_operations(queueing_port_operation* op_list) { 
+            queueing_port_operation *current; 
+            bool was_empty; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+                case try__put_task: { 
+                        graph_task* rtask = nullptr; 
+                        was_empty = this->buffer_empty(); 
+                        this->push_back(current->my_val); 
+                        if (was_empty) rtask = my_join->decrement_port_count(false); 
+                        else 
+                            rtask = SUCCESSFULLY_ENQUEUED; 
+                        current->bypass_t = rtask; 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } 
+                    break; 
+                case get__item: 
+                    if(!this->buffer_empty()) { 
+                        *(current->my_arg) = this->front(); 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } 
+                    else { 
+                        current->status.store( FAILED, std::memory_order_release); 
+                    } 
+                    break; 
+                case res_port: 
+                    __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset"); 
+                    this->destroy_front(); 
+                    if(this->my_item_valid(this->my_head)) { 
+                        (void)my_join->decrement_port_count(true); 
+                    } 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                } 
+            } 
+        } 
+    // ------------ End Aggregator --------------- 
+ 
+    protected: 
+        template< typename R, typename B > friend class run_and_put_task; 
+        template<typename X, typename Y> friend class broadcast_cache; 
+        template<typename X, typename Y> friend class round_robin_cache; 
+        graph_task* try_put_task(const T &v) override { 
+            queueing_port_operation op_data(v, try__put_task); 
+            my_aggregator.execute(&op_data); 
+            __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); 
+            if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; 
+            return op_data.bypass_t; 
+        } 
+ 
+        graph& graph_reference() const override { 
+            return my_join->graph_ref; 
+        } 
+ 
+    public: 
+ 
+        //! Constructor 
+        queueing_port() : item_buffer<T>() { 
+            my_join = nullptr; 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        //! copy constructor 
+        queueing_port(const queueing_port& /* other */) = delete; 
+ 
+        //! record parent for tallying available items 
+        void set_join_node_pointer(queueing_forwarding_base *join) { 
+            my_join = join; 
+        } 
+ 
+        bool get_item( T &v ) { 
+            queueing_port_operation op_data(&v, get__item); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        // reset_port is called when item is accepted by successor, but 
+        // is initiated by join_node. 
+        void reset_port() { 
+            queueing_port_operation op_data(res_port); 
+            my_aggregator.execute(&op_data); 
+            return; 
+        } 
+ 
+        void reset_receiver(reset_flags) { 
+            item_buffer<T>::reset(); 
+        } 
+ 
+    private: 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+        friend class get_graph_helper; 
+#endif 
+ 
+        queueing_forwarding_base *my_join; 
+    };  // queueing_port 
+ 
+#include "_flow_graph_tagged_buffer_impl.h" 
+ 
+    template<typename K> 
+    struct count_element { 
+        K my_key; 
+        size_t my_value; 
+    }; 
+ 
+    // method to access the key in the counting table 
+    // the ref has already been removed from K 
+    template< typename K > 
+    struct key_to_count_functor { 
+        typedef count_element<K> table_item_type; 
+        const K& operator()(const table_item_type& v) { return v.my_key; } 
+    }; 
+ 
+    // the ports can have only one template parameter.  We wrap the types needed in 
+    // a traits type 
+    template< class TraitsType > 
+    class key_matching_port : 
+        public receiver<typename TraitsType::T>, 
+        public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, 
+                typename TraitsType::KHash > { 
+    public: 
+        typedef TraitsType traits; 
+        typedef key_matching_port<traits> class_type; 
+        typedef typename TraitsType::T input_type; 
+        typedef typename TraitsType::K key_type; 
+        typedef typename std::decay<key_type>::type noref_key_type; 
+        typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+        typedef typename TraitsType::TtoK type_to_key_func_type; 
+        typedef typename TraitsType::KHash hash_compare_type; 
+        typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; 
+ 
+    private: 
+// ----------- Aggregator ------------ 
+    private: 
+        enum op_type { try__put, get__item, res_port 
+        }; 
+ 
+        class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> { 
+        public: 
+            char type; 
+            input_type my_val; 
+            input_type *my_arg; 
+            // constructor for value parameter 
+            key_matching_port_operation(const input_type& e, op_type t) : 
+                type(char(t)), my_val(e) {} 
+            // constructor for pointer parameter 
+            key_matching_port_operation(const input_type* p, op_type t) : 
+                type(char(t)), my_arg(const_cast<input_type*>(p)) {} 
+            // constructor with no parameter 
+            key_matching_port_operation(op_type t) : type(char(t)) {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, key_matching_port_operation> handler_type; 
+        friend class aggregating_functor<class_type, key_matching_port_operation>; 
+        aggregator<handler_type, key_matching_port_operation> my_aggregator; 
+ 
+        void handle_operations(key_matching_port_operation* op_list) { 
+            key_matching_port_operation *current; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+                case try__put: { 
+                        bool was_inserted = this->insert_with_key(current->my_val); 
+                        // return failure if a duplicate insertion occurs 
+                        current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); 
+                    } 
+                    break; 
+                case get__item: 
+                    // use current_key from FE for item 
+                    if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { 
+                        __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); 
+                    } 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case res_port: 
+                    // use current_key from FE for item 
+                    this->delete_with_key(my_join->current_key); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                } 
+            } 
+        } 
+// ------------ End Aggregator --------------- 
+    protected: 
+        template< typename R, typename B > friend class run_and_put_task; 
+        template<typename X, typename Y> friend class broadcast_cache; 
+        template<typename X, typename Y> friend class round_robin_cache; 
+        graph_task* try_put_task(const input_type& v) override { 
+            key_matching_port_operation op_data(v, try__put); 
+            graph_task* rtask = nullptr; 
+            my_aggregator.execute(&op_data); 
+            if(op_data.status == SUCCEEDED) { 
+                rtask = my_join->increment_key_count((*(this->get_key_func()))(v));  // may spawn 
+                // rtask has to reflect the return status of the try_put 
+                if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; 
+            } 
+            return rtask; 
+        } 
+ 
+        graph& graph_reference() const override { 
+            return my_join->graph_ref; 
+        } 
+ 
+    public: 
+ 
+        key_matching_port() : receiver<input_type>(), buffer_type() { 
+            my_join = nullptr; 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        // copy constructor 
+        key_matching_port(const key_matching_port& /*other*/) = delete; 
+#if __INTEL_COMPILER <= 2021 
+        // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited 
+        // class while the parent class has the virtual keyword for the destrocutor. 
+        virtual 
+#endif 
+        ~key_matching_port() { } 
+ 
+        void set_join_node_pointer(forwarding_base *join) { 
+            my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join); 
+        } 
+ 
+        void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); } 
+ 
+        type_to_key_func_type* get_my_key_func() { return this->get_key_func(); } 
+ 
+        bool get_item( input_type &v ) { 
+            // aggregator uses current_key from FE for Key 
+            key_matching_port_operation op_data(&v, get__item); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        // reset_port is called when item is accepted by successor, but 
+        // is initiated by join_node. 
+        void reset_port() { 
+            key_matching_port_operation op_data(res_port); 
+            my_aggregator.execute(&op_data); 
+            return; 
+        } 
+ 
+        void reset_receiver(reset_flags ) { 
+            buffer_type::reset(); 
+        } 
+ 
+    private: 
+        // my_join forwarding base used to count number of inputs that 
+        // received key. 
+        matching_forwarding_base<key_type> *my_join; 
+    };  // key_matching_port 
+ 
+    using namespace graph_policy_namespace; 
+ 
+    template<typename JP, typename InputTuple, typename OutputTuple> 
+    class join_node_base; 
+ 
+    //! join_node_FE : implements input port policy 
+    template<typename JP, typename InputTuple, typename OutputTuple> 
+    class join_node_FE; 
+ 
+    template<typename InputTuple, typename OutputTuple> 
+    class join_node_FE<reserving, InputTuple, OutputTuple> : public reserving_forwarding_base { 
+    public: 
+        static const int N = std::tuple_size<OutputTuple>::value; 
+        typedef OutputTuple output_type; 
+        typedef InputTuple input_type; 
+        typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding 
+ 
+        join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { 
+            ports_with_no_inputs = N; 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+        } 
+ 
+        join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { 
+            ports_with_no_inputs = N; 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+        } 
+ 
+        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } 
+ 
+       void increment_port_count() override { 
+            ++ports_with_no_inputs; 
+        } 
+ 
+        // if all input_ports have predecessors, spawn forward to try and consume tuples 
+        graph_task* decrement_port_count() override { 
+            if(ports_with_no_inputs.fetch_sub(1) == 1) { 
+                if(is_graph_active(this->graph_ref)) { 
+                    small_object_allocator allocator{}; 
+                    typedef forward_task_bypass<base_node_type> task_type; 
+                    graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); 
+                    graph_ref.reserve_wait(); 
+                    spawn_in_graph_arena(this->graph_ref, *t); 
+                } 
+            } 
+            return nullptr; 
+        } 
+ 
+        input_type &input_ports() { return my_inputs; } 
+ 
+    protected: 
+ 
+        void reset(  reset_flags f) { 
+            // called outside of parallel contexts 
+            ports_with_no_inputs = N; 
+            join_helper<N>::reset_inputs(my_inputs, f); 
+        } 
+ 
+        // all methods on input ports should be called under mutual exclusion from join_node_base. 
+ 
+        bool tuple_build_may_succeed() { 
+            return !ports_with_no_inputs; 
+        } 
+ 
+        bool try_to_make_tuple(output_type &out) { 
+            if(ports_with_no_inputs) return false; 
+            return join_helper<N>::reserve(my_inputs, out); 
+        } 
+ 
+        void tuple_accepted() { 
+            join_helper<N>::consume_reservations(my_inputs); 
+        } 
+        void tuple_rejected() { 
+            join_helper<N>::release_reservations(my_inputs); 
+        } 
+ 
+        input_type my_inputs; 
+        base_node_type *my_node; 
+        std::atomic<std::size_t> ports_with_no_inputs; 
+    };  // join_node_FE<reserving, ... > 
+ 
+    template<typename InputTuple, typename OutputTuple> 
+    class join_node_FE<queueing, InputTuple, OutputTuple> : public queueing_forwarding_base { 
+    public: 
+        static const int N = std::tuple_size<OutputTuple>::value; 
+        typedef OutputTuple output_type; 
+        typedef InputTuple input_type; 
+        typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding 
+ 
+        join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { 
+            ports_with_no_items = N; 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+        } 
+ 
+        join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { 
+            ports_with_no_items = N; 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+        } 
+ 
+        // needed for forwarding 
+        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } 
+ 
+        void reset_port_count() { 
+            ports_with_no_items = N; 
+        } 
+ 
+        // if all input_ports have items, spawn forward to try and consume tuples 
+        graph_task* decrement_port_count(bool handle_task) override 
+        { 
+            if(ports_with_no_items.fetch_sub(1) == 1) { 
+                if(is_graph_active(this->graph_ref)) { 
+                    small_object_allocator allocator{}; 
+                    typedef forward_task_bypass<base_node_type> task_type; 
+                    graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); 
+                    graph_ref.reserve_wait(); 
+                    if( !handle_task ) 
+                        return t; 
+                    spawn_in_graph_arena(this->graph_ref, *t); 
+                } 
+            } 
+            return nullptr; 
+        } 
+ 
+        input_type &input_ports() { return my_inputs; } 
+ 
+    protected: 
+ 
+        void reset(  reset_flags f) { 
+            reset_port_count(); 
+            join_helper<N>::reset_inputs(my_inputs, f ); 
+        } 
+ 
+        // all methods on input ports should be called under mutual exclusion from join_node_base. 
+ 
+        bool tuple_build_may_succeed() { 
+            return !ports_with_no_items; 
+        } 
+ 
+        bool try_to_make_tuple(output_type &out) { 
+            if(ports_with_no_items) return false; 
+            return join_helper<N>::get_items(my_inputs, out); 
+        } 
+ 
+        void tuple_accepted() { 
+            reset_port_count(); 
+            join_helper<N>::reset_ports(my_inputs); 
+        } 
+        void tuple_rejected() { 
+            // nothing to do. 
+        } 
+ 
+        input_type my_inputs; 
+        base_node_type *my_node; 
+        std::atomic<std::size_t> ports_with_no_items; 
+    };  // join_node_FE<queueing, ...> 
+ 
+    // key_matching join front-end. 
+    template<typename InputTuple, typename OutputTuple, typename K, typename KHash> 
+    class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>, 
+             // buffer of key value counts 
+              public hash_buffer<   // typedefed below to key_to_count_buffer_type 
+                  typename std::decay<K>::type&,        // force ref type on K 
+                  count_element<typename std::decay<K>::type>, 
+                  type_to_key_function_body< 
+                      count_element<typename std::decay<K>::type>, 
+                      typename std::decay<K>::type& >, 
+                  KHash >, 
+             // buffer of output items 
+             public item_buffer<OutputTuple> { 
+    public: 
+        static const int N = std::tuple_size<OutputTuple>::value; 
+        typedef OutputTuple output_type; 
+        typedef InputTuple input_type; 
+        typedef K key_type; 
+        typedef typename std::decay<key_type>::type unref_key_type; 
+        typedef KHash key_hash_compare; 
+        // must use K without ref. 
+        typedef count_element<unref_key_type> count_element_type; 
+        // method that lets us refer to the key of this type. 
+        typedef key_to_count_functor<unref_key_type> key_to_count_func; 
+        typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type; 
+        typedef type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type; 
+        // this is the type of the special table that keeps track of the number of discrete 
+        // elements corresponding to each key that we've seen. 
+        typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare > 
+                 key_to_count_buffer_type; 
+        typedef item_buffer<output_type> output_buffer_type; 
+        typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding 
+        typedef matching_forwarding_base<key_type> forwarding_base_type; 
+ 
+// ----------- Aggregator ------------ 
+        // the aggregator is only needed to serialize the access to the hash table. 
+        // and the output_buffer_type base class 
+    private: 
+        enum op_type { res_count, inc_count, may_succeed, try_make }; 
+        typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type; 
+ 
+        class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> { 
+        public: 
+            char type; 
+            unref_key_type my_val; 
+            output_type* my_output; 
+            graph_task* bypass_t; 
+            // constructor for value parameter 
+            key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), 
+                 my_output(nullptr), bypass_t(nullptr) {} 
+            key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} 
+            // constructor with no parameter 
+            key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type; 
+        friend class aggregating_functor<class_type, key_matching_FE_operation>; 
+        aggregator<handler_type, key_matching_FE_operation> my_aggregator; 
+ 
+        // called from aggregator, so serialized 
+        // returns a task pointer if the a task would have been enqueued but we asked that 
+        // it be returned.  Otherwise returns nullptr. 
+        graph_task* fill_output_buffer(unref_key_type &t) { 
+            output_type l_out; 
+            graph_task* rtask = nullptr; 
+            bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); 
+            this->current_key = t; 
+            this->delete_with_key(this->current_key);   // remove the key 
+            if(join_helper<N>::get_items(my_inputs, l_out)) {  //  <== call back 
+                this->push_back(l_out); 
+                if(do_fwd) {  // we enqueue if receiving an item from predecessor, not if successor asks for item 
+                    small_object_allocator allocator{}; 
+                    typedef forward_task_bypass<base_node_type> task_type; 
+                    rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node); 
+                    this->graph_ref.reserve_wait(); 
+                    do_fwd = false; 
+                } 
+                // retire the input values 
+                join_helper<N>::reset_ports(my_inputs);  //  <== call back 
+            } 
+            else { 
+                __TBB_ASSERT(false, "should have had something to push"); 
+            } 
+            return rtask; 
+        } 
+ 
+        void handle_operations(key_matching_FE_operation* op_list) { 
+            key_matching_FE_operation *current; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+                case res_count:  // called from BE 
+                    { 
+                        this->destroy_front(); 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } 
+                    break; 
+                case inc_count: {  // called from input ports 
+                        count_element_type *p = 0; 
+                        unref_key_type &t = current->my_val; 
+                        if(!(this->find_ref_with_key(t,p))) { 
+                            count_element_type ev; 
+                            ev.my_key = t; 
+                            ev.my_value = 0; 
+                            this->insert_with_key(ev); 
+                            bool found = this->find_ref_with_key(t, p); 
+                            __TBB_ASSERT_EX(found, "should find key after inserting it"); 
+                        } 
+                        if(++(p->my_value) == size_t(N)) { 
+                            current->bypass_t = fill_output_buffer(t); 
+                        } 
+                    } 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case may_succeed:  // called from BE 
+                    current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case try_make:  // called from BE 
+                    if(this->buffer_empty()) { 
+                        current->status.store( FAILED, std::memory_order_release); 
+                    } 
+                    else { 
+                        *(current->my_output) = this->front(); 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } 
+                    break; 
+                } 
+            } 
+        } 
+// ------------ End Aggregator --------------- 
+ 
+    public: 
+        template<typename FunctionTuple> 
+        join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+            join_helper<N>::set_key_functors(my_inputs, TtoK_funcs); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+                    TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); 
+            this->set_key_func(cfb); 
+        } 
+ 
+        join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), 
+        output_buffer_type() { 
+            my_node = nullptr; 
+            join_helper<N>::set_join_node_pointer(my_inputs, this); 
+            join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs)); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+            TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); 
+            this->set_key_func(cfb); 
+        } 
+ 
+        // needed for forwarding 
+        void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } 
+ 
+        void reset_port_count() {  // called from BE 
+            key_matching_FE_operation op_data(res_count); 
+            my_aggregator.execute(&op_data); 
+            return; 
+        } 
+ 
+        // if all input_ports have items, spawn forward to try and consume tuples 
+        // return a task if we are asked and did create one. 
+        graph_task *increment_key_count(unref_key_type const & t) override {  // called from input_ports 
+            key_matching_FE_operation op_data(t, inc_count); 
+            my_aggregator.execute(&op_data); 
+            return op_data.bypass_t; 
+        } 
+ 
+        input_type &input_ports() { return my_inputs; } 
+ 
+    protected: 
+ 
+        void reset(  reset_flags f ) { 
+            // called outside of parallel contexts 
+            join_helper<N>::reset_inputs(my_inputs, f); 
+ 
+            key_to_count_buffer_type::reset(); 
+            output_buffer_type::reset(); 
+        } 
+ 
+        // all methods on input ports should be called under mutual exclusion from join_node_base. 
+ 
+        bool tuple_build_may_succeed() {  // called from back-end 
+            key_matching_FE_operation op_data(may_succeed); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        // cannot lock while calling back to input_ports.  current_key will only be set 
+        // and reset under the aggregator, so it will remain consistent. 
+        bool try_to_make_tuple(output_type &out) { 
+            key_matching_FE_operation op_data(&out,try_make); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        void tuple_accepted() { 
+            reset_port_count();  // reset current_key after ports reset. 
+        } 
+ 
+        void tuple_rejected() { 
+            // nothing to do. 
+        } 
+ 
+        input_type my_inputs;  // input ports 
+        base_node_type *my_node; 
+    }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> 
+ 
+    //! join_node_base 
+    template<typename JP, typename InputTuple, typename OutputTuple> 
+    class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>, 
+                           public sender<OutputTuple> { 
+    protected: 
+        using graph_node::my_graph; 
+    public: 
+        typedef OutputTuple output_type; 
+ 
+        typedef typename sender<output_type>::successor_type successor_type; 
+        typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type; 
+        using input_ports_type::tuple_build_may_succeed; 
+        using input_ports_type::try_to_make_tuple; 
+        using input_ports_type::tuple_accepted; 
+        using input_ports_type::tuple_rejected; 
+ 
+    private: 
+        // ----------- Aggregator ------------ 
+        enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass 
+        }; 
+        typedef join_node_base<JP,InputTuple,OutputTuple> class_type; 
+ 
+        class join_node_base_operation : public aggregated_operation<join_node_base_operation> { 
+        public: 
+            char type; 
+            union { 
+                output_type *my_arg; 
+                successor_type *my_succ; 
+            }; 
+            graph_task* bypass_t; 
+            join_node_base_operation(const output_type& e, op_type t) : type(char(t)), 
+                my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {} 
+            join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), 
+                my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {} 
+            join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} 
+        }; 
+ 
+        typedef aggregating_functor<class_type, join_node_base_operation> handler_type; 
+        friend class aggregating_functor<class_type, join_node_base_operation>; 
+        bool forwarder_busy; 
+        aggregator<handler_type, join_node_base_operation> my_aggregator; 
+ 
+        void handle_operations(join_node_base_operation* op_list) { 
+            join_node_base_operation *current; 
+            while(op_list) { 
+                current = op_list; 
+                op_list = op_list->next; 
+                switch(current->type) { 
+                case reg_succ: { 
+                        my_successors.register_successor(*(current->my_succ)); 
+                        if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { 
+                            small_object_allocator allocator{}; 
+                            typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type; 
+                            graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); 
+                            my_graph.reserve_wait(); 
+                            spawn_in_graph_arena(my_graph, *t); 
+                            forwarder_busy = true; 
+                        } 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                    } 
+                    break; 
+                case rem_succ: 
+                    my_successors.remove_successor(*(current->my_succ)); 
+                    current->status.store( SUCCEEDED, std::memory_order_release); 
+                    break; 
+                case try__get: 
+                    if(tuple_build_may_succeed()) { 
+                        if(try_to_make_tuple(*(current->my_arg))) { 
+                            tuple_accepted(); 
+                            current->status.store( SUCCEEDED, std::memory_order_release); 
+                        } 
+                        else current->status.store( FAILED, std::memory_order_release); 
+                    } 
+                    else current->status.store( FAILED, std::memory_order_release); 
+                    break; 
+                case do_fwrd_bypass: { 
+                        bool build_succeeded; 
+                        graph_task *last_task = nullptr; 
+                        output_type out; 
+                        // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted 
+                        // are separate locked methods in the FE.  We could conceivably fetch the front 
+                        // of the FE queue, then be swapped out, have someone else consume the FE's 
+                        // object, then come back, forward, and then try to remove it from the queue 
+                        // again. Without reservation of the FE, the methods accessing it must be locked. 
+                        // We could remember the keys of the objects we forwarded, and then remove 
+                        // them from the input ports after forwarding is complete? 
+                        if(tuple_build_may_succeed()) {  // checks output queue of FE 
+                            do { 
+                                build_succeeded = try_to_make_tuple(out);  // fetch front_end of queue 
+                                if(build_succeeded) { 
+                                    graph_task *new_task = my_successors.try_put_task(out); 
+                                    last_task = combine_tasks(my_graph, last_task, new_task); 
+                                    if(new_task) { 
+                                        tuple_accepted(); 
+                                    } 
+                                    else { 
+                                        tuple_rejected(); 
+                                        build_succeeded = false; 
+                                    } 
+                                } 
+                            } while(build_succeeded); 
+                        } 
+                        current->bypass_t = last_task; 
+                        current->status.store( SUCCEEDED, std::memory_order_release); 
+                        forwarder_busy = false; 
+                    } 
+                    break; 
+                } 
+            } 
+        } 
+        // ---------- end aggregator ----------- 
+    public: 
+        join_node_base(graph &g) 
+            : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) 
+        { 
+            input_ports_type::set_my_node(this); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        join_node_base(const join_node_base& other) : 
+            graph_node(other.graph_node::my_graph), input_ports_type(other), 
+            sender<OutputTuple>(), forwarder_busy(false), my_successors(this) 
+        { 
+            input_ports_type::set_my_node(this); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        template<typename FunctionTuple> 
+        join_node_base(graph &g, FunctionTuple f) 
+            : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) 
+        { 
+            input_ports_type::set_my_node(this); 
+            my_aggregator.initialize_handler(handler_type(this)); 
+        } 
+ 
+        bool register_successor(successor_type &r) override { 
+            join_node_base_operation op_data(r, reg_succ); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        bool remove_successor( successor_type &r) override { 
+            join_node_base_operation op_data(r, rem_succ); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+        bool try_get( output_type &v) override { 
+            join_node_base_operation op_data(v, try__get); 
+            my_aggregator.execute(&op_data); 
+            return op_data.status == SUCCEEDED; 
+        } 
+ 
+    protected: 
+        void reset_node(reset_flags f) override { 
+            input_ports_type::reset(f); 
+            if(f & rf_clear_edges) my_successors.clear(); 
+        } 
+ 
+    private: 
+        broadcast_cache<output_type, null_rw_mutex> my_successors; 
+ 
+        friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >; 
+        graph_task *forward_task() { 
+            join_node_base_operation op_data(do_fwrd_bypass); 
+            my_aggregator.execute(&op_data); 
+            return op_data.bypass_t; 
+        } 
+ 
+    };  // join_node_base 
+ 
+    // join base class type generator 
+    template<int N, template<class> class PT, typename OutputTuple, typename JP> 
+    struct join_base { 
+        typedef join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type; 
+    }; 
+ 
+    template<int N, typename OutputTuple, typename K, typename KHash> 
+    struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > { 
+        typedef key_matching<K, KHash> key_traits_type; 
+        typedef K key_type; 
+        typedef KHash key_hash_compare; 
+        typedef join_node_base< key_traits_type, 
+                // ports type 
+                typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type, 
+                OutputTuple > type; 
+    }; 
+ 
+    //! unfolded_join_node : passes input_ports_type to join_node_base.  We build the input port type 
+    //  using tuple_element.  The class PT is the port type (reserving_port, queueing_port, key_matching_port) 
+    //  and should match the typename. 
+ 
+    template<int N, template<class> class PT, typename OutputTuple, typename JP> 
+    class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type { 
+    public: 
+        typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<JP, input_ports_type, output_type > base_type; 
+    public: 
+        unfolded_join_node(graph &g) : base_type(g) {} 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+ 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+    template <typename K, typename T> 
+    struct key_from_message_body { 
+        K operator()(const T& t) const { 
+            return key_from_message<K>(t); 
+        } 
+    }; 
+    // Adds const to reference type 
+    template <typename K, typename T> 
+    struct key_from_message_body<K&,T> { 
+        const K& operator()(const T& t) const { 
+            return key_from_message<const K&>(t); 
+        } 
+    }; 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+    // key_matching unfolded_join_node.  This must be a separate specialization because the constructors 
+    // differ. 
+ 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+    public: 
+        typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef std::tuple< f0_p, f1_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+ 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+    public: 
+        typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+ 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+    public: 
+        typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+ 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+    public: 
+        typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+ 
+#if __TBB_VARIADIC_MAX >= 6 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+        typedef typename std::tuple_element<5, OutputTuple>::type T5; 
+    public: 
+        typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef type_to_key_function_body<T5, K> *f5_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), 
+                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5) 
+                : base_type(g, func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4), 
+                    new type_to_key_function_body_leaf<T5, K, Body5>(body5) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+#endif 
+ 
+#if __TBB_VARIADIC_MAX >= 7 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+        typedef typename std::tuple_element<5, OutputTuple>::type T5; 
+        typedef typename std::tuple_element<6, OutputTuple>::type T6; 
+    public: 
+        typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef type_to_key_function_body<T5, K> *f5_p; 
+        typedef type_to_key_function_body<T6, K> *f6_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), 
+                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), 
+                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, 
+                 typename Body5, typename Body6> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, 
+                Body5 body5, Body6 body6) : base_type(g, func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4), 
+                    new type_to_key_function_body_leaf<T5, K, Body5>(body5), 
+                    new type_to_key_function_body_leaf<T6, K, Body6>(body6) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+#endif 
+ 
+#if __TBB_VARIADIC_MAX >= 8 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+        typedef typename std::tuple_element<5, OutputTuple>::type T5; 
+        typedef typename std::tuple_element<6, OutputTuple>::type T6; 
+        typedef typename std::tuple_element<7, OutputTuple>::type T7; 
+    public: 
+        typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef type_to_key_function_body<T5, K> *f5_p; 
+        typedef type_to_key_function_body<T6, K> *f6_p; 
+        typedef type_to_key_function_body<T7, K> *f7_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), 
+                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), 
+                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), 
+                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, 
+                 typename Body5, typename Body6, typename Body7> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, 
+                Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4), 
+                    new type_to_key_function_body_leaf<T5, K, Body5>(body5), 
+                    new type_to_key_function_body_leaf<T6, K, Body6>(body6), 
+                    new type_to_key_function_body_leaf<T7, K, Body7>(body7) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+#endif 
+ 
+#if __TBB_VARIADIC_MAX >= 9 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+        typedef typename std::tuple_element<5, OutputTuple>::type T5; 
+        typedef typename std::tuple_element<6, OutputTuple>::type T6; 
+        typedef typename std::tuple_element<7, OutputTuple>::type T7; 
+        typedef typename std::tuple_element<8, OutputTuple>::type T8; 
+    public: 
+        typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef type_to_key_function_body<T5, K> *f5_p; 
+        typedef type_to_key_function_body<T6, K> *f6_p; 
+        typedef type_to_key_function_body<T7, K> *f7_p; 
+        typedef type_to_key_function_body<T8, K> *f8_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), 
+                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), 
+                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), 
+                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), 
+                    new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, 
+                 typename Body5, typename Body6, typename Body7, typename Body8> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, 
+                Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4), 
+                    new type_to_key_function_body_leaf<T5, K, Body5>(body5), 
+                    new type_to_key_function_body_leaf<T6, K, Body6>(body6), 
+                    new type_to_key_function_body_leaf<T7, K, Body7>(body7), 
+                    new type_to_key_function_body_leaf<T8, K, Body8>(body8) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+#endif 
+ 
+#if __TBB_VARIADIC_MAX >= 10 
+    template<typename OutputTuple, typename K, typename KHash> 
+    class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public 
+            join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { 
+        typedef typename std::tuple_element<0, OutputTuple>::type T0; 
+        typedef typename std::tuple_element<1, OutputTuple>::type T1; 
+        typedef typename std::tuple_element<2, OutputTuple>::type T2; 
+        typedef typename std::tuple_element<3, OutputTuple>::type T3; 
+        typedef typename std::tuple_element<4, OutputTuple>::type T4; 
+        typedef typename std::tuple_element<5, OutputTuple>::type T5; 
+        typedef typename std::tuple_element<6, OutputTuple>::type T6; 
+        typedef typename std::tuple_element<7, OutputTuple>::type T7; 
+        typedef typename std::tuple_element<8, OutputTuple>::type T8; 
+        typedef typename std::tuple_element<9, OutputTuple>::type T9; 
+    public: 
+        typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; 
+        typedef OutputTuple output_type; 
+    private: 
+        typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; 
+        typedef type_to_key_function_body<T0, K> *f0_p; 
+        typedef type_to_key_function_body<T1, K> *f1_p; 
+        typedef type_to_key_function_body<T2, K> *f2_p; 
+        typedef type_to_key_function_body<T3, K> *f3_p; 
+        typedef type_to_key_function_body<T4, K> *f4_p; 
+        typedef type_to_key_function_body<T5, K> *f5_p; 
+        typedef type_to_key_function_body<T6, K> *f6_p; 
+        typedef type_to_key_function_body<T7, K> *f7_p; 
+        typedef type_to_key_function_body<T8, K> *f8_p; 
+        typedef type_to_key_function_body<T9, K> *f9_p; 
+        typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type; 
+    public: 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+        unfolded_join_node(graph &g) : base_type(g, 
+                func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), 
+                    new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), 
+                    new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), 
+                    new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), 
+                    new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), 
+                    new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), 
+                    new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), 
+                    new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), 
+                    new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()), 
+                    new type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>()) 
+                    ) ) { 
+        } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+        template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, 
+            typename Body5, typename Body6, typename Body7, typename Body8, typename Body9> 
+        unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, 
+                Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type( 
+                    new type_to_key_function_body_leaf<T0, K, Body0>(body0), 
+                    new type_to_key_function_body_leaf<T1, K, Body1>(body1), 
+                    new type_to_key_function_body_leaf<T2, K, Body2>(body2), 
+                    new type_to_key_function_body_leaf<T3, K, Body3>(body3), 
+                    new type_to_key_function_body_leaf<T4, K, Body4>(body4), 
+                    new type_to_key_function_body_leaf<T5, K, Body5>(body5), 
+                    new type_to_key_function_body_leaf<T6, K, Body6>(body6), 
+                    new type_to_key_function_body_leaf<T7, K, Body7>(body7), 
+                    new type_to_key_function_body_leaf<T8, K, Body8>(body8), 
+                    new type_to_key_function_body_leaf<T9, K, Body9>(body9) 
+                    ) ) { 
+            static_assert(std::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers"); 
+        } 
+        unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} 
+    }; 
+#endif 
+ 
+    //! templated function to refer to input ports of the join node 
+    template<size_t N, typename JNT> 
+    typename std::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) { 
+        return std::get<N>(jn.input_ports()); 
+    } 
+ 
+#endif // __TBB__flow_graph_join_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h
index aca465d088..bf93efae5c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h
@@ -1,769 +1,769 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_node_impl_H
-#define __TBB__flow_graph_node_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-#include "_flow_graph_item_buffer_impl.h"
-
-template< typename T, typename A >
-class function_input_queue : public item_buffer<T,A> {
-public:
-    bool empty() const {
-        return this->buffer_empty();
-    }
-
-    const T& front() const {
-        return this->item_buffer<T, A>::front();
-    }
-
-    void pop() {
-        this->destroy_front();
-    }
-
-    bool push( T& t ) {
-        return this->push_back( t );
-    }
-};
-
-//! Input and scheduling for a function node that takes a type Input as input
-//  The only up-ref is apply_body_impl, which should implement the function
-//  call and any handling of the result.
-template< typename Input, typename Policy, typename A, typename ImplType >
-class function_input_base : public receiver<Input>, no_assign {
-    enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency
-    };
-    typedef function_input_base<Input, Policy, A, ImplType> class_type;
-
-public:
-
-    //! The input type of this receiver
-    typedef Input input_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
-    typedef function_input_queue<input_type, A> input_queue_type;
-    typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type;
-    static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, "");
-
-    //! Constructor for function_input_base
-    function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority )
-        : my_graph_ref(g), my_max_concurrency(max_concurrency)
-        , my_concurrency(0), my_priority(a_priority)
-        , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL)
-        , my_predecessors(this)
-        , forwarder_busy(false)
-    {
-        my_aggregator.initialize_handler(handler_type(this));
-    }
-
-    //! Copy constructor
-    function_input_base( const function_input_base& src )
-        : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {}
-
-    //! Destructor
-    // The queue is allocated by the constructor for {multi}function_node.
-    // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
-    // This would be an interface-breaking change.
-    virtual ~function_input_base() {
-        if ( my_queue ) delete my_queue;
-    }
-
-    graph_task* try_put_task( const input_type& t) override {
-        return try_put_task_impl(t, has_policy<lightweight, Policy>());
-    }
-
-    //! Adds src to the list of cached predecessors.
-    bool register_predecessor( predecessor_type &src ) override {
-        operation_type op_data(reg_pred);
-        op_data.r = &src;
-        my_aggregator.execute(&op_data);
-        return true;
-    }
-
-    //! Removes src from the list of cached predecessors.
-    bool remove_predecessor( predecessor_type &src ) override {
-        operation_type op_data(rem_pred);
-        op_data.r = &src;
-        my_aggregator.execute(&op_data);
-        return true;
-    }
-
-protected:
-
-    void reset_function_input_base( reset_flags f) {
-        my_concurrency = 0;
-        if(my_queue) {
-            my_queue->reset();
-        }
-        reset_receiver(f);
-        forwarder_busy = false;
-    }
-
-    graph& my_graph_ref;
-    const size_t my_max_concurrency;
-    size_t my_concurrency;
-    node_priority_t my_priority;
-    input_queue_type *my_queue;
-    predecessor_cache<input_type, null_mutex > my_predecessors;
-
-    void reset_receiver( reset_flags f) {
-        if( f & rf_clear_edges) my_predecessors.clear();
-        else
-            my_predecessors.reset();
-        __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
-    }
-
-    graph& graph_reference() const override {
-        return my_graph_ref;
-    }
-
-    graph_task* try_get_postponed_task(const input_type& i) {
-        operation_type op_data(i, app_body_bypass);  // tries to pop an item or get_item
-        my_aggregator.execute(&op_data);
-        return op_data.bypass_t;
-    }
-
-private:
-
-    friend class apply_body_task_bypass< class_type, input_type >;
-    friend class forward_task_bypass< class_type >;
-
-    class operation_type : public aggregated_operation< operation_type > {
-    public:
-        char type;
-        union {
-            input_type *elem;
-            predecessor_type *r;
-        };
-        graph_task* bypass_t;
-        operation_type(const input_type& e, op_type t) :
-            type(char(t)), elem(const_cast<input_type*>(&e)) {}
-        operation_type(op_type t) : type(char(t)), r(NULL) {}
-    };
-
-    bool forwarder_busy;
-    typedef aggregating_functor<class_type, operation_type> handler_type;
-    friend class aggregating_functor<class_type, operation_type>;
-    aggregator< handler_type, operation_type > my_aggregator;
-
-    graph_task* perform_queued_requests() {
-        graph_task* new_task = NULL;
-        if(my_queue) {
-            if(!my_queue->empty()) {
-                ++my_concurrency;
-                new_task = create_body_task(my_queue->front());
-
-                my_queue->pop();
-            }
-        }
-        else {
-            input_type i;
-            if(my_predecessors.get_item(i)) {
-                ++my_concurrency;
-                new_task = create_body_task(i);
-            }
-        }
-        return new_task;
-    }
-    void handle_operations(operation_type *op_list) {
-        operation_type* tmp;
-        while (op_list) {
-            tmp = op_list;
-            op_list = op_list->next;
-            switch (tmp->type) {
-            case reg_pred:
-                my_predecessors.add(*(tmp->r));
-                tmp->status.store(SUCCEEDED, std::memory_order_release);
-                if (!forwarder_busy) {
-                    forwarder_busy = true;
-                    spawn_forward_task();
-                }
-                break;
-            case rem_pred:
-                my_predecessors.remove(*(tmp->r));
-                tmp->status.store(SUCCEEDED, std::memory_order_release);
-                break;
-            case app_body_bypass: {
-                tmp->bypass_t = NULL;
-                __TBB_ASSERT(my_max_concurrency != 0, NULL);
-                --my_concurrency;
-                if(my_concurrency<my_max_concurrency)
-                    tmp->bypass_t = perform_queued_requests();
-                tmp->status.store(SUCCEEDED, std::memory_order_release);
-            }
-                break;
-            case tryput_bypass: internal_try_put_task(tmp);  break;
-            case try_fwd: internal_forward(tmp);  break;
-            case occupy_concurrency:
-                if (my_concurrency < my_max_concurrency) {
-                    ++my_concurrency;
-                    tmp->status.store(SUCCEEDED, std::memory_order_release);
-                } else {
-                    tmp->status.store(FAILED, std::memory_order_release);
-                }
-                break;
-            }
-        }
-    }
-
-    //! Put to the node, but return the task instead of enqueueing it
-    void internal_try_put_task(operation_type *op) {
-        __TBB_ASSERT(my_max_concurrency != 0, NULL);
-        if (my_concurrency < my_max_concurrency) {
-            ++my_concurrency;
-            graph_task * new_task = create_body_task(*(op->elem));
-            op->bypass_t = new_task;
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        } else if ( my_queue && my_queue->push(*(op->elem)) ) {
-            op->bypass_t = SUCCESSFULLY_ENQUEUED;
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        } else {
-            op->bypass_t = NULL;
-            op->status.store(FAILED, std::memory_order_release);
-        }
-    }
-
-    //! Creates tasks for postponed messages if available and if concurrency allows
-    void internal_forward(operation_type *op) {
-        op->bypass_t = NULL;
-        if (my_concurrency < my_max_concurrency)
-            op->bypass_t = perform_queued_requests();
-        if(op->bypass_t)
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        else {
-            forwarder_busy = false;
-            op->status.store(FAILED, std::memory_order_release);
-        }
-    }
-
-    graph_task* internal_try_put_bypass( const input_type& t ) {
-        operation_type op_data(t, tryput_bypass);
-        my_aggregator.execute(&op_data);
-        if( op_data.status == SUCCEEDED ) {
-            return op_data.bypass_t;
-        }
-        return NULL;
-    }
-
-    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) {
-        if( my_max_concurrency == 0 ) {
-            return apply_body_bypass(t);
-        } else {
-            operation_type check_op(t, occupy_concurrency);
-            my_aggregator.execute(&check_op);
-            if( check_op.status == SUCCEEDED ) {
-                return apply_body_bypass(t);
-            }
-            return internal_try_put_bypass(t);
-        }
-    }
-
-    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) {
-        if( my_max_concurrency == 0 ) {
-            return create_body_task(t);
-        } else {
-            return internal_try_put_bypass(t);
-        }
-    }
-
-    //! Applies the body to the provided input
-    //  then decides if more work is available
-    graph_task* apply_body_bypass( const input_type &i ) {
-        return static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
-    }
-
-    //! allocates a task to apply a body
-    graph_task* create_body_task( const input_type &input ) {
-        if (!is_graph_active(my_graph_ref)) {
-            return nullptr;
-        }
-        // TODO revamp: extract helper for common graph task allocation part
-        small_object_allocator allocator{};
-        typedef apply_body_task_bypass<class_type, input_type> task_type;
-        graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority );
-        graph_reference().reserve_wait();
-        return t;
-    }
-
-    //! This is executed by an enqueued task, the "forwarder"
-    graph_task* forward_task() {
-        operation_type op_data(try_fwd);
-        graph_task* rval = NULL;
-        do {
-            op_data.status = WAIT;
-            my_aggregator.execute(&op_data);
-            if(op_data.status == SUCCEEDED) {
-                graph_task* ttask = op_data.bypass_t;
-                __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL );
-                rval = combine_tasks(my_graph_ref, rval, ttask);
-            }
-        } while (op_data.status == SUCCEEDED);
-        return rval;
-    }
-
-    inline graph_task* create_forward_task() {
-        if (!is_graph_active(my_graph_ref)) {
-            return nullptr;
-        }
-        small_object_allocator allocator{};
-        typedef forward_task_bypass<class_type> task_type;
-        graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority );
-        graph_reference().reserve_wait();
-        return t;
-    }
-
-    //! Spawns a task that calls forward()
-    inline void spawn_forward_task() {
-        graph_task* tp = create_forward_task();
-        if(tp) {
-            spawn_in_graph_arena(graph_reference(), *tp);
-        }
-    }
-
-    node_priority_t priority() const override { return my_priority; }
-};  // function_input_base
-
-//! Implements methods for a function node that takes a type Input as input and sends
-//  a type Output to its successors.
-template< typename Input, typename Output, typename Policy, typename A>
-class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > {
-public:
-    typedef Input input_type;
-    typedef Output output_type;
-    typedef function_body<input_type, output_type> function_body_type;
-    typedef function_input<Input, Output, Policy,A> my_class;
-    typedef function_input_base<Input, Policy, A, my_class> base_type;
-    typedef function_input_queue<input_type, A> input_queue_type;
-
-    // constructor
-    template<typename Body>
-    function_input(
-        graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority )
-      : base_type(g, max_concurrency, a_priority)
-      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
-      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) {
-    }
-
-    //! Copy constructor
-    function_input( const function_input& src ) :
-        base_type(src),
-        my_body( src.my_init_body->clone() ),
-        my_init_body(src.my_init_body->clone() ) {
-    }
-#if __INTEL_COMPILER <= 2021
-    // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited
-    // class while the parent class has the virtual keyword for the destrocutor.
-    virtual
-#endif
-    ~function_input() {
-        delete my_body;
-        delete my_init_body;
-    }
-
-    template< typename Body >
-    Body copy_function_object() {
-        function_body_type &body_ref = *this->my_body;
-        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
-    }
-
-    output_type apply_body_impl( const input_type& i) {
-        // There is an extra copied needed to capture the
-        // body execution without the try_put
-        fgt_begin_body( my_body );
-        output_type v = (*my_body)(i);
-        fgt_end_body( my_body );
-        return v;
-    }
-
-    //TODO: consider moving into the base class
-    graph_task* apply_body_impl_bypass( const input_type &i) {
-        output_type v = apply_body_impl(i);
-        graph_task* postponed_task = NULL;
-        if( base_type::my_max_concurrency != 0 ) {
-            postponed_task = base_type::try_get_postponed_task(i);
-            __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL );
-        }
-        if( postponed_task ) {
-            // make the task available for other workers since we do not know successors'
-            // execution policy
-            spawn_in_graph_arena(base_type::graph_reference(), *postponed_task);
-        }
-        graph_task* successor_task = successors().try_put_task(v);
-#if _MSC_VER && !__INTEL_COMPILER
-#pragma warning (push)
-#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
-#endif
-        if(has_policy<lightweight, Policy>::value) {
-#if _MSC_VER && !__INTEL_COMPILER
-#pragma warning (pop)
-#endif
-            if(!successor_task) {
-                // Return confirmative status since current
-                // node's body has been executed anyway
-                successor_task = SUCCESSFULLY_ENQUEUED;
-            }
-        }
-        return successor_task;
-    }
-
-protected:
-
-    void reset_function_input(reset_flags f) {
-        base_type::reset_function_input_base(f);
-        if(f & rf_reset_bodies) {
-            function_body_type *tmp = my_init_body->clone();
-            delete my_body;
-            my_body = tmp;
-        }
-    }
-
-    function_body_type *my_body;
-    function_body_type *my_init_body;
-    virtual broadcast_cache<output_type > &successors() = 0;
-
-};  // function_input
-
-
-// helper templates to clear the successor edges of the output ports of an multifunction_node
-template<int N> struct clear_element {
-    template<typename P> static void clear_this(P &p) {
-        (void)std::get<N-1>(p).successors().clear();
-        clear_element<N-1>::clear_this(p);
-    }
-#if TBB_USE_ASSERT
-    template<typename P> static bool this_empty(P &p) {
-        if(std::get<N-1>(p).successors().empty())
-            return clear_element<N-1>::this_empty(p);
-        return false;
-    }
-#endif
-};
-
-template<> struct clear_element<1> {
-    template<typename P> static void clear_this(P &p) {
-        (void)std::get<0>(p).successors().clear();
-    }
-#if TBB_USE_ASSERT
-    template<typename P> static bool this_empty(P &p) {
-        return std::get<0>(p).successors().empty();
-    }
-#endif
-};
-
-template <typename OutputTuple>
-struct init_output_ports {
-    template <typename... Args>
-    static OutputTuple call(graph& g, const std::tuple<Args...>&) {
-        return OutputTuple(Args(g)...);
-    }
-}; // struct init_output_ports
-
-//! Implements methods for a function node that takes a type Input as input
-//  and has a tuple of output ports specified.
-template< typename Input, typename OutputPortSet, typename Policy, typename A>
-class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > {
-public:
-    static const int N = std::tuple_size<OutputPortSet>::value;
-    typedef Input input_type;
-    typedef OutputPortSet output_ports_type;
-    typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
-    typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class;
-    typedef function_input_base<Input, Policy, A, my_class> base_type;
-    typedef function_input_queue<input_type, A> input_queue_type;
-
-    // constructor
-    template<typename Body>
-    multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority )
-      : base_type(g, max_concurrency, a_priority)
-      , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
-      , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
-      , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){
-    }
-
-    //! Copy constructor
-    multifunction_input( const multifunction_input& src ) :
-        base_type(src),
-        my_body( src.my_init_body->clone() ),
-        my_init_body(src.my_init_body->clone() ),
-        my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) {
-    }
-
-    ~multifunction_input() {
-        delete my_body;
-        delete my_init_body;
-    }
-
-    template< typename Body >
-    Body copy_function_object() {
-        multifunction_body_type &body_ref = *this->my_body;
-        return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
-    }
-
-    // for multifunction nodes we do not have a single successor as such.  So we just tell
-    // the task we were successful.
-    //TODO: consider moving common parts with implementation in function_input into separate function
-    graph_task* apply_body_impl_bypass( const input_type &i ) {
-        fgt_begin_body( my_body );
-        (*my_body)(i, my_output_ports);
-        fgt_end_body( my_body );
-        graph_task* ttask = NULL;
-        if(base_type::my_max_concurrency != 0) {
-            ttask = base_type::try_get_postponed_task(i);
-        }
-        return ttask ? ttask : SUCCESSFULLY_ENQUEUED;
-    }
-
-    output_ports_type &output_ports(){ return my_output_ports; }
-
-protected:
-
-    void reset(reset_flags f) {
-        base_type::reset_function_input_base(f);
-        if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
-        if(f & rf_reset_bodies) {
-            multifunction_body_type* tmp = my_init_body->clone();
-            delete my_body;
-            my_body = tmp;
-        }
-        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
-    }
-
-    multifunction_body_type *my_body;
-    multifunction_body_type *my_init_body;
-    output_ports_type my_output_ports;
-
-};  // multifunction_input
-
-// template to refer to an output port of a multifunction_node
-template<size_t N, typename MOP>
-typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
-    return std::get<N>(op.output_ports());
-}
-
-inline void check_task_and_spawn(graph& g, graph_task* t) {
-    if (t && t != SUCCESSFULLY_ENQUEUED) {
-        spawn_in_graph_arena(g, *t);
-    }
-}
-
-// helper structs for split_node
-template<int N>
-struct emit_element {
-    template<typename T, typename P>
-    static graph_task* emit_this(graph& g, const T &t, P &p) {
-        // TODO: consider to collect all the tasks in task_list and spawn them all at once
-        graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t));
-        check_task_and_spawn(g, last_task);
-        return emit_element<N-1>::emit_this(g,t,p);
-    }
-};
-
-template<>
-struct emit_element<1> {
-    template<typename T, typename P>
-    static graph_task* emit_this(graph& g, const T &t, P &p) {
-        graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t));
-        check_task_and_spawn(g, last_task);
-        return SUCCESSFULLY_ENQUEUED;
-    }
-};
-
-//! Implements methods for an executable node that takes continue_msg as input
-template< typename Output, typename Policy>
-class continue_input : public continue_receiver {
-public:
-
-    //! The input type of this receiver
-    typedef continue_msg input_type;
-
-    //! The output type of this receiver
-    typedef Output output_type;
-    typedef function_body<input_type, output_type> function_body_type;
-    typedef continue_input<output_type, Policy> class_type;
-
-    template< typename Body >
-    continue_input( graph &g, Body& body, node_priority_t a_priority )
-        : continue_receiver(/*number_of_predecessors=*/0, a_priority)
-        , my_graph_ref(g)
-        , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
-        , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
-    { }
-
-    template< typename Body >
-    continue_input( graph &g, int number_of_predecessors,
-                    Body& body, node_priority_t a_priority )
-      : continue_receiver( number_of_predecessors, a_priority )
-      , my_graph_ref(g)
-      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
-      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
-    { }
-
-    continue_input( const continue_input& src ) : continue_receiver(src),
-                                                  my_graph_ref(src.my_graph_ref),
-                                                  my_body( src.my_init_body->clone() ),
-                                                  my_init_body( src.my_init_body->clone() ) {}
-
-    ~continue_input() {
-        delete my_body;
-        delete my_init_body;
-    }
-
-    template< typename Body >
-    Body copy_function_object() {
-        function_body_type &body_ref = *my_body;
-        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
-    }
-
-    void reset_receiver( reset_flags f) override {
-        continue_receiver::reset_receiver(f);
-        if(f & rf_reset_bodies) {
-            function_body_type *tmp = my_init_body->clone();
-            delete my_body;
-            my_body = tmp;
-        }
-    }
-
-protected:
-
-    graph& my_graph_ref;
-    function_body_type *my_body;
-    function_body_type *my_init_body;
-
-    virtual broadcast_cache<output_type > &successors() = 0;
-
-    friend class apply_body_task_bypass< class_type, continue_msg >;
-
-    //! Applies the body to the provided input
-    graph_task* apply_body_bypass( input_type ) {
-        // There is an extra copied needed to capture the
-        // body execution without the try_put
-        fgt_begin_body( my_body );
-        output_type v = (*my_body)( continue_msg() );
-        fgt_end_body( my_body );
-        return successors().try_put_task( v );
-    }
-
-    graph_task* execute() override {
-        if(!is_graph_active(my_graph_ref)) {
-            return NULL;
-        }
-#if _MSC_VER && !__INTEL_COMPILER
-#pragma warning (push)
-#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
-#endif
-        if(has_policy<lightweight, Policy>::value) {
-#if _MSC_VER && !__INTEL_COMPILER
-#pragma warning (pop)
-#endif
-            return apply_body_bypass( continue_msg() );
-        }
-        else {
-            small_object_allocator allocator{};
-            typedef apply_body_task_bypass<class_type, continue_msg> task_type;
-            graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority );
-            graph_reference().reserve_wait();
-            return t;
-        }
-    }
-
-    graph& graph_reference() const override {
-        return my_graph_ref;
-    }
-};  // continue_input
-
-//! Implements methods for both executable and function nodes that puts Output to its successors
-template< typename Output >
-class function_output : public sender<Output> {
-public:
-
-    template<int N> friend struct clear_element;
-    typedef Output output_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-    typedef broadcast_cache<output_type> broadcast_cache_type;
-
-    function_output(graph& g) : my_successors(this), my_graph_ref(g) {}
-    function_output(const function_output& other) = delete;
-
-    //! Adds a new successor to this node
-    bool register_successor( successor_type &r ) override {
-        successors().register_successor( r );
-        return true;
-    }
-
-    //! Removes a successor from this node
-    bool remove_successor( successor_type &r ) override {
-        successors().remove_successor( r );
-        return true;
-    }
-
-    broadcast_cache_type &successors() { return my_successors; }
-
-    graph& graph_reference() const { return my_graph_ref; }
-protected:
-    broadcast_cache_type my_successors;
-    graph& my_graph_ref;
-};  // function_output
-
-template< typename Output >
-class multifunction_output : public function_output<Output> {
-public:
-    typedef Output output_type;
-    typedef function_output<output_type> base_type;
-    using base_type::my_successors;
-
-    multifunction_output(graph& g) : base_type(g) {}
-    multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {}
-
-    bool try_put(const output_type &i) {
-        graph_task *res = try_put_task(i);
-        if( !res ) return false;
-        if( res != SUCCESSFULLY_ENQUEUED ) {
-            // wrapping in task_arena::execute() is not needed since the method is called from
-            // inside task::execute()
-            spawn_in_graph_arena(graph_reference(), *res);
-        }
-        return true;
-    }
-
-    using base_type::graph_reference;
-
-protected:
-
-    graph_task* try_put_task(const output_type &i) {
-        return my_successors.try_put_task(i);
-    }
-
-    template <int N> friend struct emit_element;
-
-};  // multifunction_output
-
-//composite_node
-template<typename CompositeType>
-void add_nodes_impl(CompositeType*, bool) {}
-
-template< typename CompositeType, typename NodeType1, typename... NodeTypes >
-void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
-    void *addr = const_cast<NodeType1 *>(&n1);
-
-    fgt_alias_port(c_node, addr, visible);
-    add_nodes_impl(c_node, visible, n...);
-}
-
-#endif // __TBB__flow_graph_node_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_node_impl_H 
+#define __TBB__flow_graph_node_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+#include "_flow_graph_item_buffer_impl.h" 
+ 
+template< typename T, typename A > 
+class function_input_queue : public item_buffer<T,A> { 
+public: 
+    bool empty() const { 
+        return this->buffer_empty(); 
+    } 
+ 
+    const T& front() const { 
+        return this->item_buffer<T, A>::front(); 
+    } 
+ 
+    void pop() { 
+        this->destroy_front(); 
+    } 
+ 
+    bool push( T& t ) { 
+        return this->push_back( t ); 
+    } 
+}; 
+ 
+//! Input and scheduling for a function node that takes a type Input as input 
+//  The only up-ref is apply_body_impl, which should implement the function 
+//  call and any handling of the result. 
+template< typename Input, typename Policy, typename A, typename ImplType > 
+class function_input_base : public receiver<Input>, no_assign { 
+    enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency 
+    }; 
+    typedef function_input_base<Input, Policy, A, ImplType> class_type; 
+ 
+public: 
+ 
+    //! The input type of this receiver 
+    typedef Input input_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type; 
+    typedef function_input_queue<input_type, A> input_queue_type; 
+    typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type; 
+    static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, ""); 
+ 
+    //! Constructor for function_input_base 
+    function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) 
+        : my_graph_ref(g), my_max_concurrency(max_concurrency) 
+        , my_concurrency(0), my_priority(a_priority) 
+        , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL) 
+        , my_predecessors(this) 
+        , forwarder_busy(false) 
+    { 
+        my_aggregator.initialize_handler(handler_type(this)); 
+    } 
+ 
+    //! Copy constructor 
+    function_input_base( const function_input_base& src ) 
+        : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} 
+ 
+    //! Destructor 
+    // The queue is allocated by the constructor for {multi}function_node. 
+    // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. 
+    // This would be an interface-breaking change. 
+    virtual ~function_input_base() { 
+        if ( my_queue ) delete my_queue; 
+    } 
+ 
+    graph_task* try_put_task( const input_type& t) override { 
+        return try_put_task_impl(t, has_policy<lightweight, Policy>()); 
+    } 
+ 
+    //! Adds src to the list of cached predecessors. 
+    bool register_predecessor( predecessor_type &src ) override { 
+        operation_type op_data(reg_pred); 
+        op_data.r = &src; 
+        my_aggregator.execute(&op_data); 
+        return true; 
+    } 
+ 
+    //! Removes src from the list of cached predecessors. 
+    bool remove_predecessor( predecessor_type &src ) override { 
+        operation_type op_data(rem_pred); 
+        op_data.r = &src; 
+        my_aggregator.execute(&op_data); 
+        return true; 
+    } 
+ 
+protected: 
+ 
+    void reset_function_input_base( reset_flags f) { 
+        my_concurrency = 0; 
+        if(my_queue) { 
+            my_queue->reset(); 
+        } 
+        reset_receiver(f); 
+        forwarder_busy = false; 
+    } 
+ 
+    graph& my_graph_ref; 
+    const size_t my_max_concurrency; 
+    size_t my_concurrency; 
+    node_priority_t my_priority; 
+    input_queue_type *my_queue; 
+    predecessor_cache<input_type, null_mutex > my_predecessors; 
+ 
+    void reset_receiver( reset_flags f) { 
+        if( f & rf_clear_edges) my_predecessors.clear(); 
+        else 
+            my_predecessors.reset(); 
+        __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_graph_ref; 
+    } 
+ 
+    graph_task* try_get_postponed_task(const input_type& i) { 
+        operation_type op_data(i, app_body_bypass);  // tries to pop an item or get_item 
+        my_aggregator.execute(&op_data); 
+        return op_data.bypass_t; 
+    } 
+ 
+private: 
+ 
+    friend class apply_body_task_bypass< class_type, input_type >; 
+    friend class forward_task_bypass< class_type >; 
+ 
+    class operation_type : public aggregated_operation< operation_type > { 
+    public: 
+        char type; 
+        union { 
+            input_type *elem; 
+            predecessor_type *r; 
+        }; 
+        graph_task* bypass_t; 
+        operation_type(const input_type& e, op_type t) : 
+            type(char(t)), elem(const_cast<input_type*>(&e)) {} 
+        operation_type(op_type t) : type(char(t)), r(NULL) {} 
+    }; 
+ 
+    bool forwarder_busy; 
+    typedef aggregating_functor<class_type, operation_type> handler_type; 
+    friend class aggregating_functor<class_type, operation_type>; 
+    aggregator< handler_type, operation_type > my_aggregator; 
+ 
+    graph_task* perform_queued_requests() { 
+        graph_task* new_task = NULL; 
+        if(my_queue) { 
+            if(!my_queue->empty()) { 
+                ++my_concurrency; 
+                new_task = create_body_task(my_queue->front()); 
+ 
+                my_queue->pop(); 
+            } 
+        } 
+        else { 
+            input_type i; 
+            if(my_predecessors.get_item(i)) { 
+                ++my_concurrency; 
+                new_task = create_body_task(i); 
+            } 
+        } 
+        return new_task; 
+    } 
+    void handle_operations(operation_type *op_list) { 
+        operation_type* tmp; 
+        while (op_list) { 
+            tmp = op_list; 
+            op_list = op_list->next; 
+            switch (tmp->type) { 
+            case reg_pred: 
+                my_predecessors.add(*(tmp->r)); 
+                tmp->status.store(SUCCEEDED, std::memory_order_release); 
+                if (!forwarder_busy) { 
+                    forwarder_busy = true; 
+                    spawn_forward_task(); 
+                } 
+                break; 
+            case rem_pred: 
+                my_predecessors.remove(*(tmp->r)); 
+                tmp->status.store(SUCCEEDED, std::memory_order_release); 
+                break; 
+            case app_body_bypass: { 
+                tmp->bypass_t = NULL; 
+                __TBB_ASSERT(my_max_concurrency != 0, NULL); 
+                --my_concurrency; 
+                if(my_concurrency<my_max_concurrency) 
+                    tmp->bypass_t = perform_queued_requests(); 
+                tmp->status.store(SUCCEEDED, std::memory_order_release); 
+            } 
+                break; 
+            case tryput_bypass: internal_try_put_task(tmp);  break; 
+            case try_fwd: internal_forward(tmp);  break; 
+            case occupy_concurrency: 
+                if (my_concurrency < my_max_concurrency) { 
+                    ++my_concurrency; 
+                    tmp->status.store(SUCCEEDED, std::memory_order_release); 
+                } else { 
+                    tmp->status.store(FAILED, std::memory_order_release); 
+                } 
+                break; 
+            } 
+        } 
+    } 
+ 
+    //! Put to the node, but return the task instead of enqueueing it 
+    void internal_try_put_task(operation_type *op) { 
+        __TBB_ASSERT(my_max_concurrency != 0, NULL); 
+        if (my_concurrency < my_max_concurrency) { 
+            ++my_concurrency; 
+            graph_task * new_task = create_body_task(*(op->elem)); 
+            op->bypass_t = new_task; 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } else if ( my_queue && my_queue->push(*(op->elem)) ) { 
+            op->bypass_t = SUCCESSFULLY_ENQUEUED; 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } else { 
+            op->bypass_t = NULL; 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+    } 
+ 
+    //! Creates tasks for postponed messages if available and if concurrency allows 
+    void internal_forward(operation_type *op) { 
+        op->bypass_t = NULL; 
+        if (my_concurrency < my_max_concurrency) 
+            op->bypass_t = perform_queued_requests(); 
+        if(op->bypass_t) 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        else { 
+            forwarder_busy = false; 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+    } 
+ 
+    graph_task* internal_try_put_bypass( const input_type& t ) { 
+        operation_type op_data(t, tryput_bypass); 
+        my_aggregator.execute(&op_data); 
+        if( op_data.status == SUCCEEDED ) { 
+            return op_data.bypass_t; 
+        } 
+        return NULL; 
+    } 
+ 
+    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { 
+        if( my_max_concurrency == 0 ) { 
+            return apply_body_bypass(t); 
+        } else { 
+            operation_type check_op(t, occupy_concurrency); 
+            my_aggregator.execute(&check_op); 
+            if( check_op.status == SUCCEEDED ) { 
+                return apply_body_bypass(t); 
+            } 
+            return internal_try_put_bypass(t); 
+        } 
+    } 
+ 
+    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { 
+        if( my_max_concurrency == 0 ) { 
+            return create_body_task(t); 
+        } else { 
+            return internal_try_put_bypass(t); 
+        } 
+    } 
+ 
+    //! Applies the body to the provided input 
+    //  then decides if more work is available 
+    graph_task* apply_body_bypass( const input_type &i ) { 
+        return static_cast<ImplType *>(this)->apply_body_impl_bypass(i); 
+    } 
+ 
+    //! allocates a task to apply a body 
+    graph_task* create_body_task( const input_type &input ) { 
+        if (!is_graph_active(my_graph_ref)) { 
+            return nullptr; 
+        } 
+        // TODO revamp: extract helper for common graph task allocation part 
+        small_object_allocator allocator{}; 
+        typedef apply_body_task_bypass<class_type, input_type> task_type; 
+        graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority ); 
+        graph_reference().reserve_wait(); 
+        return t; 
+    } 
+ 
+    //! This is executed by an enqueued task, the "forwarder" 
+    graph_task* forward_task() { 
+        operation_type op_data(try_fwd); 
+        graph_task* rval = NULL; 
+        do { 
+            op_data.status = WAIT; 
+            my_aggregator.execute(&op_data); 
+            if(op_data.status == SUCCEEDED) { 
+                graph_task* ttask = op_data.bypass_t; 
+                __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); 
+                rval = combine_tasks(my_graph_ref, rval, ttask); 
+            } 
+        } while (op_data.status == SUCCEEDED); 
+        return rval; 
+    } 
+ 
+    inline graph_task* create_forward_task() { 
+        if (!is_graph_active(my_graph_ref)) { 
+            return nullptr; 
+        } 
+        small_object_allocator allocator{}; 
+        typedef forward_task_bypass<class_type> task_type; 
+        graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority ); 
+        graph_reference().reserve_wait(); 
+        return t; 
+    } 
+ 
+    //! Spawns a task that calls forward() 
+    inline void spawn_forward_task() { 
+        graph_task* tp = create_forward_task(); 
+        if(tp) { 
+            spawn_in_graph_arena(graph_reference(), *tp); 
+        } 
+    } 
+ 
+    node_priority_t priority() const override { return my_priority; } 
+};  // function_input_base 
+ 
+//! Implements methods for a function node that takes a type Input as input and sends 
+//  a type Output to its successors. 
+template< typename Input, typename Output, typename Policy, typename A> 
+class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > { 
+public: 
+    typedef Input input_type; 
+    typedef Output output_type; 
+    typedef function_body<input_type, output_type> function_body_type; 
+    typedef function_input<Input, Output, Policy,A> my_class; 
+    typedef function_input_base<Input, Policy, A, my_class> base_type; 
+    typedef function_input_queue<input_type, A> input_queue_type; 
+ 
+    // constructor 
+    template<typename Body> 
+    function_input( 
+        graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) 
+      : base_type(g, max_concurrency, a_priority) 
+      , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) 
+      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { 
+    } 
+ 
+    //! Copy constructor 
+    function_input( const function_input& src ) : 
+        base_type(src), 
+        my_body( src.my_init_body->clone() ), 
+        my_init_body(src.my_init_body->clone() ) { 
+    } 
+#if __INTEL_COMPILER <= 2021 
+    // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited 
+    // class while the parent class has the virtual keyword for the destrocutor. 
+    virtual 
+#endif 
+    ~function_input() { 
+        delete my_body; 
+        delete my_init_body; 
+    } 
+ 
+    template< typename Body > 
+    Body copy_function_object() { 
+        function_body_type &body_ref = *this->my_body; 
+        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); 
+    } 
+ 
+    output_type apply_body_impl( const input_type& i) { 
+        // There is an extra copied needed to capture the 
+        // body execution without the try_put 
+        fgt_begin_body( my_body ); 
+        output_type v = (*my_body)(i); 
+        fgt_end_body( my_body ); 
+        return v; 
+    } 
+ 
+    //TODO: consider moving into the base class 
+    graph_task* apply_body_impl_bypass( const input_type &i) { 
+        output_type v = apply_body_impl(i); 
+        graph_task* postponed_task = NULL; 
+        if( base_type::my_max_concurrency != 0 ) { 
+            postponed_task = base_type::try_get_postponed_task(i); 
+            __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); 
+        } 
+        if( postponed_task ) { 
+            // make the task available for other workers since we do not know successors' 
+            // execution policy 
+            spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); 
+        } 
+        graph_task* successor_task = successors().try_put_task(v); 
+#if _MSC_VER && !__INTEL_COMPILER 
+#pragma warning (push) 
+#pragma warning (disable: 4127)  /* suppress conditional expression is constant */ 
+#endif 
+        if(has_policy<lightweight, Policy>::value) { 
+#if _MSC_VER && !__INTEL_COMPILER 
+#pragma warning (pop) 
+#endif 
+            if(!successor_task) { 
+                // Return confirmative status since current 
+                // node's body has been executed anyway 
+                successor_task = SUCCESSFULLY_ENQUEUED; 
+            } 
+        } 
+        return successor_task; 
+    } 
+ 
+protected: 
+ 
+    void reset_function_input(reset_flags f) { 
+        base_type::reset_function_input_base(f); 
+        if(f & rf_reset_bodies) { 
+            function_body_type *tmp = my_init_body->clone(); 
+            delete my_body; 
+            my_body = tmp; 
+        } 
+    } 
+ 
+    function_body_type *my_body; 
+    function_body_type *my_init_body; 
+    virtual broadcast_cache<output_type > &successors() = 0; 
+ 
+};  // function_input 
+ 
+ 
+// helper templates to clear the successor edges of the output ports of an multifunction_node 
+template<int N> struct clear_element { 
+    template<typename P> static void clear_this(P &p) { 
+        (void)std::get<N-1>(p).successors().clear(); 
+        clear_element<N-1>::clear_this(p); 
+    } 
+#if TBB_USE_ASSERT 
+    template<typename P> static bool this_empty(P &p) { 
+        if(std::get<N-1>(p).successors().empty()) 
+            return clear_element<N-1>::this_empty(p); 
+        return false; 
+    } 
+#endif 
+}; 
+ 
+template<> struct clear_element<1> { 
+    template<typename P> static void clear_this(P &p) { 
+        (void)std::get<0>(p).successors().clear(); 
+    } 
+#if TBB_USE_ASSERT 
+    template<typename P> static bool this_empty(P &p) { 
+        return std::get<0>(p).successors().empty(); 
+    } 
+#endif 
+}; 
+ 
+template <typename OutputTuple> 
+struct init_output_ports { 
+    template <typename... Args> 
+    static OutputTuple call(graph& g, const std::tuple<Args...>&) { 
+        return OutputTuple(Args(g)...); 
+    } 
+}; // struct init_output_ports 
+ 
+//! Implements methods for a function node that takes a type Input as input 
+//  and has a tuple of output ports specified. 
+template< typename Input, typename OutputPortSet, typename Policy, typename A> 
+class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > { 
+public: 
+    static const int N = std::tuple_size<OutputPortSet>::value; 
+    typedef Input input_type; 
+    typedef OutputPortSet output_ports_type; 
+    typedef multifunction_body<input_type, output_ports_type> multifunction_body_type; 
+    typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class; 
+    typedef function_input_base<Input, Policy, A, my_class> base_type; 
+    typedef function_input_queue<input_type, A> input_queue_type; 
+ 
+    // constructor 
+    template<typename Body> 
+    multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) 
+      : base_type(g, max_concurrency, a_priority) 
+      , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) 
+      , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) 
+      , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){ 
+    } 
+ 
+    //! Copy constructor 
+    multifunction_input( const multifunction_input& src ) : 
+        base_type(src), 
+        my_body( src.my_init_body->clone() ), 
+        my_init_body(src.my_init_body->clone() ), 
+        my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) { 
+    } 
+ 
+    ~multifunction_input() { 
+        delete my_body; 
+        delete my_init_body; 
+    } 
+ 
+    template< typename Body > 
+    Body copy_function_object() { 
+        multifunction_body_type &body_ref = *this->my_body; 
+        return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr()); 
+    } 
+ 
+    // for multifunction nodes we do not have a single successor as such.  So we just tell 
+    // the task we were successful. 
+    //TODO: consider moving common parts with implementation in function_input into separate function 
+    graph_task* apply_body_impl_bypass( const input_type &i ) { 
+        fgt_begin_body( my_body ); 
+        (*my_body)(i, my_output_ports); 
+        fgt_end_body( my_body ); 
+        graph_task* ttask = NULL; 
+        if(base_type::my_max_concurrency != 0) { 
+            ttask = base_type::try_get_postponed_task(i); 
+        } 
+        return ttask ? ttask : SUCCESSFULLY_ENQUEUED; 
+    } 
+ 
+    output_ports_type &output_ports(){ return my_output_ports; } 
+ 
+protected: 
+ 
+    void reset(reset_flags f) { 
+        base_type::reset_function_input_base(f); 
+        if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports); 
+        if(f & rf_reset_bodies) { 
+            multifunction_body_type* tmp = my_init_body->clone(); 
+            delete my_body; 
+            my_body = tmp; 
+        } 
+        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed"); 
+    } 
+ 
+    multifunction_body_type *my_body; 
+    multifunction_body_type *my_init_body; 
+    output_ports_type my_output_ports; 
+ 
+};  // multifunction_input 
+ 
+// template to refer to an output port of a multifunction_node 
+template<size_t N, typename MOP> 
+typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) { 
+    return std::get<N>(op.output_ports()); 
+} 
+ 
+inline void check_task_and_spawn(graph& g, graph_task* t) { 
+    if (t && t != SUCCESSFULLY_ENQUEUED) { 
+        spawn_in_graph_arena(g, *t); 
+    } 
+} 
+ 
+// helper structs for split_node 
+template<int N> 
+struct emit_element { 
+    template<typename T, typename P> 
+    static graph_task* emit_this(graph& g, const T &t, P &p) { 
+        // TODO: consider to collect all the tasks in task_list and spawn them all at once 
+        graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t)); 
+        check_task_and_spawn(g, last_task); 
+        return emit_element<N-1>::emit_this(g,t,p); 
+    } 
+}; 
+ 
+template<> 
+struct emit_element<1> { 
+    template<typename T, typename P> 
+    static graph_task* emit_this(graph& g, const T &t, P &p) { 
+        graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); 
+        check_task_and_spawn(g, last_task); 
+        return SUCCESSFULLY_ENQUEUED; 
+    } 
+}; 
+ 
+//! Implements methods for an executable node that takes continue_msg as input 
+template< typename Output, typename Policy> 
+class continue_input : public continue_receiver { 
+public: 
+ 
+    //! The input type of this receiver 
+    typedef continue_msg input_type; 
+ 
+    //! The output type of this receiver 
+    typedef Output output_type; 
+    typedef function_body<input_type, output_type> function_body_type; 
+    typedef continue_input<output_type, Policy> class_type; 
+ 
+    template< typename Body > 
+    continue_input( graph &g, Body& body, node_priority_t a_priority ) 
+        : continue_receiver(/*number_of_predecessors=*/0, a_priority) 
+        , my_graph_ref(g) 
+        , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) 
+        , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) 
+    { } 
+ 
+    template< typename Body > 
+    continue_input( graph &g, int number_of_predecessors, 
+                    Body& body, node_priority_t a_priority ) 
+      : continue_receiver( number_of_predecessors, a_priority ) 
+      , my_graph_ref(g) 
+      , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) 
+      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) 
+    { } 
+ 
+    continue_input( const continue_input& src ) : continue_receiver(src), 
+                                                  my_graph_ref(src.my_graph_ref), 
+                                                  my_body( src.my_init_body->clone() ), 
+                                                  my_init_body( src.my_init_body->clone() ) {} 
+ 
+    ~continue_input() { 
+        delete my_body; 
+        delete my_init_body; 
+    } 
+ 
+    template< typename Body > 
+    Body copy_function_object() { 
+        function_body_type &body_ref = *my_body; 
+        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); 
+    } 
+ 
+    void reset_receiver( reset_flags f) override { 
+        continue_receiver::reset_receiver(f); 
+        if(f & rf_reset_bodies) { 
+            function_body_type *tmp = my_init_body->clone(); 
+            delete my_body; 
+            my_body = tmp; 
+        } 
+    } 
+ 
+protected: 
+ 
+    graph& my_graph_ref; 
+    function_body_type *my_body; 
+    function_body_type *my_init_body; 
+ 
+    virtual broadcast_cache<output_type > &successors() = 0; 
+ 
+    friend class apply_body_task_bypass< class_type, continue_msg >; 
+ 
+    //! Applies the body to the provided input 
+    graph_task* apply_body_bypass( input_type ) { 
+        // There is an extra copied needed to capture the 
+        // body execution without the try_put 
+        fgt_begin_body( my_body ); 
+        output_type v = (*my_body)( continue_msg() ); 
+        fgt_end_body( my_body ); 
+        return successors().try_put_task( v ); 
+    } 
+ 
+    graph_task* execute() override { 
+        if(!is_graph_active(my_graph_ref)) { 
+            return NULL; 
+        } 
+#if _MSC_VER && !__INTEL_COMPILER 
+#pragma warning (push) 
+#pragma warning (disable: 4127)  /* suppress conditional expression is constant */ 
+#endif 
+        if(has_policy<lightweight, Policy>::value) { 
+#if _MSC_VER && !__INTEL_COMPILER 
+#pragma warning (pop) 
+#endif 
+            return apply_body_bypass( continue_msg() ); 
+        } 
+        else { 
+            small_object_allocator allocator{}; 
+            typedef apply_body_task_bypass<class_type, continue_msg> task_type; 
+            graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); 
+            graph_reference().reserve_wait(); 
+            return t; 
+        } 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_graph_ref; 
+    } 
+};  // continue_input 
+ 
+//! Implements methods for both executable and function nodes that puts Output to its successors 
+template< typename Output > 
+class function_output : public sender<Output> { 
+public: 
+ 
+    template<int N> friend struct clear_element; 
+    typedef Output output_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+    typedef broadcast_cache<output_type> broadcast_cache_type; 
+ 
+    function_output(graph& g) : my_successors(this), my_graph_ref(g) {} 
+    function_output(const function_output& other) = delete; 
+ 
+    //! Adds a new successor to this node 
+    bool register_successor( successor_type &r ) override { 
+        successors().register_successor( r ); 
+        return true; 
+    } 
+ 
+    //! Removes a successor from this node 
+    bool remove_successor( successor_type &r ) override { 
+        successors().remove_successor( r ); 
+        return true; 
+    } 
+ 
+    broadcast_cache_type &successors() { return my_successors; } 
+ 
+    graph& graph_reference() const { return my_graph_ref; } 
+protected: 
+    broadcast_cache_type my_successors; 
+    graph& my_graph_ref; 
+};  // function_output 
+ 
+template< typename Output > 
+class multifunction_output : public function_output<Output> { 
+public: 
+    typedef Output output_type; 
+    typedef function_output<output_type> base_type; 
+    using base_type::my_successors; 
+ 
+    multifunction_output(graph& g) : base_type(g) {} 
+    multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} 
+ 
+    bool try_put(const output_type &i) { 
+        graph_task *res = try_put_task(i); 
+        if( !res ) return false; 
+        if( res != SUCCESSFULLY_ENQUEUED ) { 
+            // wrapping in task_arena::execute() is not needed since the method is called from 
+            // inside task::execute() 
+            spawn_in_graph_arena(graph_reference(), *res); 
+        } 
+        return true; 
+    } 
+ 
+    using base_type::graph_reference; 
+ 
+protected: 
+ 
+    graph_task* try_put_task(const output_type &i) { 
+        return my_successors.try_put_task(i); 
+    } 
+ 
+    template <int N> friend struct emit_element; 
+ 
+};  // multifunction_output 
+ 
+//composite_node 
+template<typename CompositeType> 
+void add_nodes_impl(CompositeType*, bool) {} 
+ 
+template< typename CompositeType, typename NodeType1, typename... NodeTypes > 
+void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { 
+    void *addr = const_cast<NodeType1 *>(&n1); 
+ 
+    fgt_alias_port(c_node, addr, visible); 
+    add_nodes_impl(c_node, visible, n...); 
+} 
+ 
+#endif // __TBB__flow_graph_node_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h
index ce867121f9..24f720f816 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h
@@ -1,265 +1,265 @@
-/*
-    Copyright (c) 2020-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_flow_graph_node_set_impl_H
-#define __TBB_flow_graph_node_set_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// Included in namespace tbb::detail::d1 (in flow_graph.h)
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get
-// Seems like the well-formed expression in trailing decltype is treated as ill-formed
-// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution
-#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900)
-
-namespace order {
-struct undefined {};
-struct following {};
-struct preceding {};
-}
-
-class get_graph_helper {
-public:
-    // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph
-    // and remove get_graph_helper
-    template <typename T>
-    static graph& get(const T& object) {
-        return get_impl(object, std::is_base_of<graph_node, T>());
-    }
-
-private:
-    // Get graph from the object of type derived from graph_node
-    template <typename T>
-    static graph& get_impl(const T& object, std::true_type) {
-        return static_cast<const graph_node*>(&object)->my_graph;
-    }
-
-    template <typename T>
-    static graph& get_impl(const T& object, std::false_type) {
-        return object.graph_reference();
-    }
-};
-
-template<typename Order, typename... Nodes>
-struct node_set {
-    typedef Order order_type;
-
-    std::tuple<Nodes&...> nodes;
-    node_set(Nodes&... ns) : nodes(ns...) {}
-
-    template <typename... Nodes2>
-    node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {}
-
-    graph& graph_reference() const {
-        return get_graph_helper::get(std::get<0>(nodes));
-    }
-};
-
-namespace alias_helpers {
-template <typename T> using output_type = typename T::output_type;
-template <typename T> using output_ports_type = typename T::output_ports_type;
-template <typename T> using input_type = typename T::input_type;
-template <typename T> using input_ports_type = typename T::input_ports_type;
-} // namespace alias_helpers
-
-template <typename T>
-using has_output_type = supports<T, alias_helpers::output_type>;
-
-template <typename T>
-using has_input_type = supports<T, alias_helpers::input_type>;
-
-template <typename T>
-using has_input_ports_type = supports<T, alias_helpers::input_ports_type>;
-
-template <typename T>
-using has_output_ports_type = supports<T, alias_helpers::output_ports_type>;
-
-template<typename T>
-struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {};
-
-template<typename T>
-struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {};
-
-template <typename Node>
-struct is_async_node : std::false_type {};
-
-template <typename... Args>
-struct is_async_node<async_node<Args...>> : std::true_type {};
-
-template<typename FirstPredecessor, typename... Predecessors>
-node_set<order::following, FirstPredecessor, Predecessors...>
-follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) {
-    static_assert((conjunction<has_output_type<FirstPredecessor>,
-                                                   has_output_type<Predecessors>...>::value),
-                        "Not all node's predecessors has output_type typedef");
-    static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value),
-                        "Not all node's predecessors are senders");
-    return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...);
-}
-
-template<typename... Predecessors>
-node_set<order::following, Predecessors...>
-follows(node_set<order::undefined, Predecessors...>& predecessors_set) {
-    static_assert((conjunction<has_output_type<Predecessors>...>::value),
-                        "Not all nodes in the set has output_type typedef");
-    static_assert((conjunction<is_sender<Predecessors>...>::value),
-                        "Not all nodes in the set are senders");
-    return node_set<order::following, Predecessors...>(predecessors_set);
-}
-
-template<typename FirstSuccessor, typename... Successors>
-node_set<order::preceding, FirstSuccessor, Successors...>
-precedes(FirstSuccessor& first_successor, Successors&... successors) {
-    static_assert((conjunction<has_input_type<FirstSuccessor>,
-                                                    has_input_type<Successors>...>::value),
-                        "Not all node's successors has input_type typedef");
-    static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value),
-                        "Not all node's successors are receivers");
-    return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...);
-}
-
-template<typename... Successors>
-node_set<order::preceding, Successors...>
-precedes(node_set<order::undefined, Successors...>& successors_set) {
-    static_assert((conjunction<has_input_type<Successors>...>::value),
-                        "Not all nodes in the set has input_type typedef");
-    static_assert((conjunction<is_receiver<Successors>...>::value),
-                        "Not all nodes in the set are receivers");
-    return node_set<order::preceding, Successors...>(successors_set);
-}
-
-template <typename Node, typename... Nodes>
-node_set<order::undefined, Node, Nodes...>
-make_node_set(Node& first_node, Nodes&... nodes) {
-    return node_set<order::undefined, Node, Nodes...>(first_node, nodes...);
-}
-
-template<size_t I>
-class successor_selector {
-    template <typename NodeType>
-    static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) {
-        return input_port<I>(node);
-    }
-
-    template <typename NodeType>
-    static NodeType& get_impl(NodeType& node, std::false_type) { return node; }
-
-public:
-    template <typename NodeType>
-#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
-    static auto& get(NodeType& node)
-#else
-    static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>()))
-#endif
-    {
-        return get_impl(node, has_input_ports_type<NodeType>());
-    }
-};
-
-template<size_t I>
-class predecessor_selector {
-    template <typename NodeType>
-    static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) {
-        return output_port<I>(node);
-    }
-
-    template <typename NodeType>
-    static NodeType& internal_get(NodeType& node, std::false_type) { return node;}
-
-    template <typename NodeType>
-#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
-    static auto& get_impl(NodeType& node, std::false_type)
-#else
-    static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>()))
-#endif
-    {
-        return internal_get(node, has_output_ports_type<NodeType>());
-    }
-
-    template <typename AsyncNode>
-    static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; }
-
-public:
-    template <typename NodeType>
-#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
-    static auto& get(NodeType& node)
-#else
-    static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>()))
-#endif
-    {
-        return get_impl(node, is_async_node<NodeType>());
-    }
-};
-
-template<size_t I>
-class make_edges_helper {
-public:
-    template<typename PredecessorsTuple, typename NodeType>
-    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
-        make_edge(std::get<I>(predecessors), successor_selector<I>::get(node));
-        make_edges_helper<I - 1>::connect_predecessors(predecessors, node);
-    }
-
-    template<typename SuccessorsTuple, typename NodeType>
-    static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
-        make_edge(predecessor_selector<I>::get(node), std::get<I>(successors));
-        make_edges_helper<I - 1>::connect_successors(node, successors);
-    }
-};
-
-template<>
-struct make_edges_helper<0> {
-    template<typename PredecessorsTuple, typename NodeType>
-    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
-        make_edge(std::get<0>(predecessors), successor_selector<0>::get(node));
-    }
-
-    template<typename SuccessorsTuple, typename NodeType>
-    static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
-        make_edge(predecessor_selector<0>::get(node), std::get<0>(successors));
-    }
-};
-
-// TODO: consider adding an overload for making edges between node sets
-template<typename NodeType, typename OrderFlagType, typename... Args>
-void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) {
-    const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value;
-    make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node);
-}
-
-template <typename NodeType, typename OrderFlagType, typename... Args>
-void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) {
-    const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value;
-    make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes);
-}
-
-template <typename NodeType, typename... Nodes>
-void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) {
-    make_edges(ns, node);
-}
-
-template <typename NodeType, typename... Nodes>
-void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) {
-    make_edges(node, ns);
-}
-
-#endif  // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-#endif // __TBB_flow_graph_node_set_impl_H
+/* 
+    Copyright (c) 2020-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_flow_graph_node_set_impl_H 
+#define __TBB_flow_graph_node_set_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// Included in namespace tbb::detail::d1 (in flow_graph.h) 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get 
+// Seems like the well-formed expression in trailing decltype is treated as ill-formed 
+// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution 
+#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900) 
+ 
+namespace order { 
+struct undefined {}; 
+struct following {}; 
+struct preceding {}; 
+} 
+ 
+class get_graph_helper { 
+public: 
+    // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph 
+    // and remove get_graph_helper 
+    template <typename T> 
+    static graph& get(const T& object) { 
+        return get_impl(object, std::is_base_of<graph_node, T>()); 
+    } 
+ 
+private: 
+    // Get graph from the object of type derived from graph_node 
+    template <typename T> 
+    static graph& get_impl(const T& object, std::true_type) { 
+        return static_cast<const graph_node*>(&object)->my_graph; 
+    } 
+ 
+    template <typename T> 
+    static graph& get_impl(const T& object, std::false_type) { 
+        return object.graph_reference(); 
+    } 
+}; 
+ 
+template<typename Order, typename... Nodes> 
+struct node_set { 
+    typedef Order order_type; 
+ 
+    std::tuple<Nodes&...> nodes; 
+    node_set(Nodes&... ns) : nodes(ns...) {} 
+ 
+    template <typename... Nodes2> 
+    node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {} 
+ 
+    graph& graph_reference() const { 
+        return get_graph_helper::get(std::get<0>(nodes)); 
+    } 
+}; 
+ 
+namespace alias_helpers { 
+template <typename T> using output_type = typename T::output_type; 
+template <typename T> using output_ports_type = typename T::output_ports_type; 
+template <typename T> using input_type = typename T::input_type; 
+template <typename T> using input_ports_type = typename T::input_ports_type; 
+} // namespace alias_helpers 
+ 
+template <typename T> 
+using has_output_type = supports<T, alias_helpers::output_type>; 
+ 
+template <typename T> 
+using has_input_type = supports<T, alias_helpers::input_type>; 
+ 
+template <typename T> 
+using has_input_ports_type = supports<T, alias_helpers::input_ports_type>; 
+ 
+template <typename T> 
+using has_output_ports_type = supports<T, alias_helpers::output_ports_type>; 
+ 
+template<typename T> 
+struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {}; 
+ 
+template<typename T> 
+struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {}; 
+ 
+template <typename Node> 
+struct is_async_node : std::false_type {}; 
+ 
+template <typename... Args> 
+struct is_async_node<async_node<Args...>> : std::true_type {}; 
+ 
+template<typename FirstPredecessor, typename... Predecessors> 
+node_set<order::following, FirstPredecessor, Predecessors...> 
+follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) { 
+    static_assert((conjunction<has_output_type<FirstPredecessor>, 
+                                                   has_output_type<Predecessors>...>::value), 
+                        "Not all node's predecessors has output_type typedef"); 
+    static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value), 
+                        "Not all node's predecessors are senders"); 
+    return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...); 
+} 
+ 
+template<typename... Predecessors> 
+node_set<order::following, Predecessors...> 
+follows(node_set<order::undefined, Predecessors...>& predecessors_set) { 
+    static_assert((conjunction<has_output_type<Predecessors>...>::value), 
+                        "Not all nodes in the set has output_type typedef"); 
+    static_assert((conjunction<is_sender<Predecessors>...>::value), 
+                        "Not all nodes in the set are senders"); 
+    return node_set<order::following, Predecessors...>(predecessors_set); 
+} 
+ 
+template<typename FirstSuccessor, typename... Successors> 
+node_set<order::preceding, FirstSuccessor, Successors...> 
+precedes(FirstSuccessor& first_successor, Successors&... successors) { 
+    static_assert((conjunction<has_input_type<FirstSuccessor>, 
+                                                    has_input_type<Successors>...>::value), 
+                        "Not all node's successors has input_type typedef"); 
+    static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value), 
+                        "Not all node's successors are receivers"); 
+    return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...); 
+} 
+ 
+template<typename... Successors> 
+node_set<order::preceding, Successors...> 
+precedes(node_set<order::undefined, Successors...>& successors_set) { 
+    static_assert((conjunction<has_input_type<Successors>...>::value), 
+                        "Not all nodes in the set has input_type typedef"); 
+    static_assert((conjunction<is_receiver<Successors>...>::value), 
+                        "Not all nodes in the set are receivers"); 
+    return node_set<order::preceding, Successors...>(successors_set); 
+} 
+ 
+template <typename Node, typename... Nodes> 
+node_set<order::undefined, Node, Nodes...> 
+make_node_set(Node& first_node, Nodes&... nodes) { 
+    return node_set<order::undefined, Node, Nodes...>(first_node, nodes...); 
+} 
+ 
+template<size_t I> 
+class successor_selector { 
+    template <typename NodeType> 
+    static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) { 
+        return input_port<I>(node); 
+    } 
+ 
+    template <typename NodeType> 
+    static NodeType& get_impl(NodeType& node, std::false_type) { return node; } 
+ 
+public: 
+    template <typename NodeType> 
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE 
+    static auto& get(NodeType& node) 
+#else 
+    static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>())) 
+#endif 
+    { 
+        return get_impl(node, has_input_ports_type<NodeType>()); 
+    } 
+}; 
+ 
+template<size_t I> 
+class predecessor_selector { 
+    template <typename NodeType> 
+    static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) { 
+        return output_port<I>(node); 
+    } 
+ 
+    template <typename NodeType> 
+    static NodeType& internal_get(NodeType& node, std::false_type) { return node;} 
+ 
+    template <typename NodeType> 
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE 
+    static auto& get_impl(NodeType& node, std::false_type) 
+#else 
+    static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>())) 
+#endif 
+    { 
+        return internal_get(node, has_output_ports_type<NodeType>()); 
+    } 
+ 
+    template <typename AsyncNode> 
+    static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; } 
+ 
+public: 
+    template <typename NodeType> 
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE 
+    static auto& get(NodeType& node) 
+#else 
+    static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>())) 
+#endif 
+    { 
+        return get_impl(node, is_async_node<NodeType>()); 
+    } 
+}; 
+ 
+template<size_t I> 
+class make_edges_helper { 
+public: 
+    template<typename PredecessorsTuple, typename NodeType> 
+    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { 
+        make_edge(std::get<I>(predecessors), successor_selector<I>::get(node)); 
+        make_edges_helper<I - 1>::connect_predecessors(predecessors, node); 
+    } 
+ 
+    template<typename SuccessorsTuple, typename NodeType> 
+    static void connect_successors(NodeType& node, SuccessorsTuple& successors) { 
+        make_edge(predecessor_selector<I>::get(node), std::get<I>(successors)); 
+        make_edges_helper<I - 1>::connect_successors(node, successors); 
+    } 
+}; 
+ 
+template<> 
+struct make_edges_helper<0> { 
+    template<typename PredecessorsTuple, typename NodeType> 
+    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { 
+        make_edge(std::get<0>(predecessors), successor_selector<0>::get(node)); 
+    } 
+ 
+    template<typename SuccessorsTuple, typename NodeType> 
+    static void connect_successors(NodeType& node, SuccessorsTuple& successors) { 
+        make_edge(predecessor_selector<0>::get(node), std::get<0>(successors)); 
+    } 
+}; 
+ 
+// TODO: consider adding an overload for making edges between node sets 
+template<typename NodeType, typename OrderFlagType, typename... Args> 
+void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) { 
+    const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; 
+    make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node); 
+} 
+ 
+template <typename NodeType, typename OrderFlagType, typename... Args> 
+void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) { 
+    const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; 
+    make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes); 
+} 
+ 
+template <typename NodeType, typename... Nodes> 
+void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) { 
+    make_edges(ns, node); 
+} 
+ 
+template <typename NodeType, typename... Nodes> 
+void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) { 
+    make_edges(node, ns); 
+} 
+ 
+#endif  // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+#endif // __TBB_flow_graph_node_set_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h
index 8c20993795..7b325f0cf7 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h
@@ -1,277 +1,277 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_flow_graph_nodes_deduction_H
-#define __TBB_flow_graph_nodes_deduction_H
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Input, typename Output>
-struct declare_body_types {
-    using input_type = Input;
-    using output_type = Output;
-};
-
-struct NoInputBody {};
-
-template <typename Output>
-struct declare_body_types<NoInputBody, Output> {
-    using output_type = Output;
-};
-
-template <typename T> struct body_types;
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {};
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {};
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {};
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {};
-
-template <typename T, typename Output>
-struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {};
-
-template <typename T, typename Output>
-struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {};
-
-template <typename Input, typename Output>
-struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {};
-
-template <typename Input, typename Output>
-struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {};
-
-template <typename Output>
-struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {};
-
-template <typename Body>
-using input_t = typename body_types<Body>::input_type;
-
-template <typename Body>
-using output_t = typename body_types<Body>::output_type;
-
-template <typename T, typename Input, typename Output>
-auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name);
-
-template <typename T, typename Input, typename Output>
-auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name);
-
-template <typename T, typename Input, typename Output>
-auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name);
-
-template <typename T, typename Input, typename Output>
-auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name);
-
-template <typename Input, typename Output>
-auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name);
-
-template <typename Input, typename Output>
-auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name);
-
-template <typename Body>
-decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int);
-
-template <typename Body>
-decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...);
-
-// Deduction guides for Flow Graph nodes
-
-template <typename GraphOrSet, typename Body>
-input_node(GraphOrSet&&, Body)
-->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>;
-    
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-template <typename NodeSet>
-struct decide_on_set;
-
-template <typename Node, typename... Nodes>
-struct decide_on_set<node_set<order::following, Node, Nodes...>> {
-    using type = typename Node::output_type;
-};
-
-template <typename Node, typename... Nodes>
-struct decide_on_set<node_set<order::preceding, Node, Nodes...>> {
-    using type = typename Node::input_type;
-};
-
-template <typename NodeSet>
-using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type;
-
-template <typename NodeSet>
-broadcast_node(const NodeSet&)
-->broadcast_node<decide_on_set_t<NodeSet>>;
-
-template <typename NodeSet>
-buffer_node(const NodeSet&)
-->buffer_node<decide_on_set_t<NodeSet>>;
-
-template <typename NodeSet>
-queue_node(const NodeSet&)
-->queue_node<decide_on_set_t<NodeSet>>;
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-template <typename GraphOrProxy, typename Sequencer>
-sequencer_node(GraphOrProxy&&, Sequencer)
-->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-template <typename NodeSet, typename Compare>
-priority_queue_node(const NodeSet&, const Compare&)
-->priority_queue_node<decide_on_set_t<NodeSet>, Compare>;
-
-template <typename NodeSet>
-priority_queue_node(const NodeSet&)
-->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>;
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-template <typename Key>
-struct join_key {
-    using type = Key;
-};
-
-template <typename T>
-struct join_key<const T&> {
-    using type = T&;
-};
-
-template <typename Key>
-using join_key_t = typename join_key<Key>::type;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-template <typename Policy, typename... Predecessors>
-join_node(const node_set<order::following, Predecessors...>&, Policy)
-->join_node<std::tuple<typename Predecessors::output_type...>,
-            Policy>;
-
-template <typename Policy, typename Successor, typename... Successors>
-join_node(const node_set<order::preceding, Successor, Successors...>&, Policy)
-->join_node<typename Successor::input_type, Policy>;
-
-template <typename... Predecessors>
-join_node(const node_set<order::following, Predecessors...>)
-->join_node<std::tuple<typename Predecessors::output_type...>,
-            queueing>;
-
-template <typename Successor, typename... Successors>
-join_node(const node_set<order::preceding, Successor, Successors...>)
-->join_node<typename Successor::input_type, queueing>;
-#endif
-
-template <typename GraphOrProxy, typename Body, typename... Bodies>
-join_node(GraphOrProxy&&, Body, Bodies...)
-->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>,
-                       input_t<decltype(decide_on_callable_type<Bodies>(0))>...>,
-            key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-template <typename... Predecessors>
-indexer_node(const node_set<order::following, Predecessors...>&)
-->indexer_node<typename Predecessors::output_type...>;
-#endif
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-template <typename NodeSet>
-limiter_node(const NodeSet&, size_t)
-->limiter_node<decide_on_set_t<NodeSet>>;
-
-template <typename Predecessor, typename... Predecessors>
-split_node(const node_set<order::following, Predecessor, Predecessors...>&)
-->split_node<typename Predecessor::output_type>;
-
-template <typename... Successors>
-split_node(const node_set<order::preceding, Successors...>&)
-->split_node<std::tuple<typename Successors::input_type...>>;
-
-#endif
-
-template <typename GraphOrSet, typename Body, typename Policy>
-function_node(GraphOrSet&&,
-              size_t, Body,
-              Policy, node_priority_t = no_priority)
-->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
-                output_t<decltype(decide_on_callable_type<Body>(0))>,
-                Policy>;
-
-template <typename GraphOrSet, typename Body>
-function_node(GraphOrSet&&, size_t,
-              Body, node_priority_t = no_priority)
-->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
-                output_t<decltype(decide_on_callable_type<Body>(0))>,
-                queueing>;
-
-template <typename Output>
-struct continue_output {
-    using type = Output;
-};
-
-template <>
-struct continue_output<void> {
-    using type = continue_msg;
-};
-
-template <typename T>
-using continue_output_t = typename continue_output<T>::type;
-
-template <typename GraphOrSet, typename Body, typename Policy>
-continue_node(GraphOrSet&&, Body,
-              Policy, node_priority_t = no_priority)
-->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
-                Policy>;
-
-template <typename GraphOrSet, typename Body, typename Policy>
-continue_node(GraphOrSet&&,
-              int, Body,
-              Policy, node_priority_t = no_priority)
-->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
-                Policy>;
-
-template <typename GraphOrSet, typename Body>
-continue_node(GraphOrSet&&,
-              Body, node_priority_t = no_priority)
-->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>;
-
-template <typename GraphOrSet, typename Body>
-continue_node(GraphOrSet&&, int,
-              Body, node_priority_t = no_priority)
-->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
-                Policy<void>>;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-template <typename NodeSet>
-overwrite_node(const NodeSet&)
-->overwrite_node<decide_on_set_t<NodeSet>>;
-
-template <typename NodeSet>
-write_once_node(const NodeSet&)
-->write_once_node<decide_on_set_t<NodeSet>>;
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-#endif // __TBB_flow_graph_nodes_deduction_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_flow_graph_nodes_deduction_H 
+#define __TBB_flow_graph_nodes_deduction_H 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Input, typename Output> 
+struct declare_body_types { 
+    using input_type = Input; 
+    using output_type = Output; 
+}; 
+ 
+struct NoInputBody {}; 
+ 
+template <typename Output> 
+struct declare_body_types<NoInputBody, Output> { 
+    using output_type = Output; 
+}; 
+ 
+template <typename T> struct body_types; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {}; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {}; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {}; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {}; 
+ 
+template <typename T, typename Output> 
+struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {}; 
+ 
+template <typename T, typename Output> 
+struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; 
+ 
+template <typename Input, typename Output> 
+struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {}; 
+ 
+template <typename Input, typename Output> 
+struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {}; 
+ 
+template <typename Output> 
+struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; 
+ 
+template <typename Body> 
+using input_t = typename body_types<Body>::input_type; 
+ 
+template <typename Body> 
+using output_t = typename body_types<Body>::output_type; 
+ 
+template <typename T, typename Input, typename Output> 
+auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name); 
+ 
+template <typename T, typename Input, typename Output> 
+auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name); 
+ 
+template <typename T, typename Input, typename Output> 
+auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name); 
+ 
+template <typename T, typename Input, typename Output> 
+auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name); 
+ 
+template <typename Input, typename Output> 
+auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name); 
+ 
+template <typename Input, typename Output> 
+auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name); 
+ 
+template <typename Body> 
+decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int); 
+ 
+template <typename Body> 
+decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...); 
+ 
+// Deduction guides for Flow Graph nodes 
+ 
+template <typename GraphOrSet, typename Body> 
+input_node(GraphOrSet&&, Body) 
+->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>; 
+     
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+template <typename NodeSet> 
+struct decide_on_set; 
+ 
+template <typename Node, typename... Nodes> 
+struct decide_on_set<node_set<order::following, Node, Nodes...>> { 
+    using type = typename Node::output_type; 
+}; 
+ 
+template <typename Node, typename... Nodes> 
+struct decide_on_set<node_set<order::preceding, Node, Nodes...>> { 
+    using type = typename Node::input_type; 
+}; 
+ 
+template <typename NodeSet> 
+using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type; 
+ 
+template <typename NodeSet> 
+broadcast_node(const NodeSet&) 
+->broadcast_node<decide_on_set_t<NodeSet>>; 
+ 
+template <typename NodeSet> 
+buffer_node(const NodeSet&) 
+->buffer_node<decide_on_set_t<NodeSet>>; 
+ 
+template <typename NodeSet> 
+queue_node(const NodeSet&) 
+->queue_node<decide_on_set_t<NodeSet>>; 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+template <typename GraphOrProxy, typename Sequencer> 
+sequencer_node(GraphOrProxy&&, Sequencer) 
+->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+template <typename NodeSet, typename Compare> 
+priority_queue_node(const NodeSet&, const Compare&) 
+->priority_queue_node<decide_on_set_t<NodeSet>, Compare>; 
+ 
+template <typename NodeSet> 
+priority_queue_node(const NodeSet&) 
+->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>; 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+template <typename Key> 
+struct join_key { 
+    using type = Key; 
+}; 
+ 
+template <typename T> 
+struct join_key<const T&> { 
+    using type = T&; 
+}; 
+ 
+template <typename Key> 
+using join_key_t = typename join_key<Key>::type; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+template <typename Policy, typename... Predecessors> 
+join_node(const node_set<order::following, Predecessors...>&, Policy) 
+->join_node<std::tuple<typename Predecessors::output_type...>, 
+            Policy>; 
+ 
+template <typename Policy, typename Successor, typename... Successors> 
+join_node(const node_set<order::preceding, Successor, Successors...>&, Policy) 
+->join_node<typename Successor::input_type, Policy>; 
+ 
+template <typename... Predecessors> 
+join_node(const node_set<order::following, Predecessors...>) 
+->join_node<std::tuple<typename Predecessors::output_type...>, 
+            queueing>; 
+ 
+template <typename Successor, typename... Successors> 
+join_node(const node_set<order::preceding, Successor, Successors...>) 
+->join_node<typename Successor::input_type, queueing>; 
+#endif 
+ 
+template <typename GraphOrProxy, typename Body, typename... Bodies> 
+join_node(GraphOrProxy&&, Body, Bodies...) 
+->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>, 
+                       input_t<decltype(decide_on_callable_type<Bodies>(0))>...>, 
+            key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+template <typename... Predecessors> 
+indexer_node(const node_set<order::following, Predecessors...>&) 
+->indexer_node<typename Predecessors::output_type...>; 
+#endif 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+template <typename NodeSet> 
+limiter_node(const NodeSet&, size_t) 
+->limiter_node<decide_on_set_t<NodeSet>>; 
+ 
+template <typename Predecessor, typename... Predecessors> 
+split_node(const node_set<order::following, Predecessor, Predecessors...>&) 
+->split_node<typename Predecessor::output_type>; 
+ 
+template <typename... Successors> 
+split_node(const node_set<order::preceding, Successors...>&) 
+->split_node<std::tuple<typename Successors::input_type...>>; 
+ 
+#endif 
+ 
+template <typename GraphOrSet, typename Body, typename Policy> 
+function_node(GraphOrSet&&, 
+              size_t, Body, 
+              Policy, node_priority_t = no_priority) 
+->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, 
+                output_t<decltype(decide_on_callable_type<Body>(0))>, 
+                Policy>; 
+ 
+template <typename GraphOrSet, typename Body> 
+function_node(GraphOrSet&&, size_t, 
+              Body, node_priority_t = no_priority) 
+->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, 
+                output_t<decltype(decide_on_callable_type<Body>(0))>, 
+                queueing>; 
+ 
+template <typename Output> 
+struct continue_output { 
+    using type = Output; 
+}; 
+ 
+template <> 
+struct continue_output<void> { 
+    using type = continue_msg; 
+}; 
+ 
+template <typename T> 
+using continue_output_t = typename continue_output<T>::type; 
+ 
+template <typename GraphOrSet, typename Body, typename Policy> 
+continue_node(GraphOrSet&&, Body, 
+              Policy, node_priority_t = no_priority) 
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, 
+                Policy>; 
+ 
+template <typename GraphOrSet, typename Body, typename Policy> 
+continue_node(GraphOrSet&&, 
+              int, Body, 
+              Policy, node_priority_t = no_priority) 
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, 
+                Policy>; 
+ 
+template <typename GraphOrSet, typename Body> 
+continue_node(GraphOrSet&&, 
+              Body, node_priority_t = no_priority) 
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>; 
+ 
+template <typename GraphOrSet, typename Body> 
+continue_node(GraphOrSet&&, int, 
+              Body, node_priority_t = no_priority) 
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, 
+                Policy<void>>; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+template <typename NodeSet> 
+overwrite_node(const NodeSet&) 
+->overwrite_node<decide_on_set_t<NodeSet>>; 
+ 
+template <typename NodeSet> 
+write_once_node(const NodeSet&) 
+->write_once_node<decide_on_set_t<NodeSet>>; 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+#endif // __TBB_flow_graph_nodes_deduction_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h
index 0c4580a199..f9bc3d3369 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h
@@ -1,256 +1,256 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-// a hash table buffer that can expand, and can support as many deletions as
-// additions, list-based, with elements of list held in array (for destruction
-// management), multiplicative hashing (like ets).  No synchronization built-in.
-//
-
-#ifndef __TBB__flow_graph_hash_buffer_impl_H
-#define __TBB__flow_graph_hash_buffer_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::flow::interfaceX::internal
-
-// elements in the table are a simple list; we need pointer to next element to
-// traverse the chain
-template<typename ValueType>
-struct buffer_element_type {
-    // the second parameter below is void * because we can't forward-declare the type
-    // itself, so we just reinterpret_cast below.
-    typedef typename aligned_pair<ValueType, void *>::type type;
-};
-
-template
-    <
-     typename Key,         // type of key within ValueType
-     typename ValueType,
-     typename ValueToKey,  // abstract method that returns "const Key" or "const Key&" given ValueType
-     typename HashCompare, // has hash and equal
-     typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type >
-    >
-class hash_buffer : public HashCompare {
-public:
-    static const size_t INITIAL_SIZE = 8;  // initial size of the hash pointer table
-    typedef ValueType value_type;
-    typedef typename buffer_element_type< value_type >::type element_type;
-    typedef value_type *pointer_type;
-    typedef element_type *list_array_type;  // array we manage manually
-    typedef list_array_type *pointer_array_type;
-    typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type;
-    typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator;
-    typedef typename std::decay<Key>::type Knoref;
-
-private:
-    ValueToKey *my_key;
-    size_t my_size;
-    size_t nelements;
-    pointer_array_type pointer_array;    // pointer_array[my_size]
-    list_array_type elements_array;      // elements_array[my_size / 2]
-    element_type* free_list;
-
-    size_t mask() { return my_size - 1; }
-
-    void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
-        for(size_t i=0; i < sz - 1; ++i ) {  // construct free list
-            la[i].second = &(la[i+1]);
-        }
-        la[sz-1].second = NULL;
-        *p_free_list = (element_type *)&(la[0]);
-    }
-
-    // cleanup for exceptions
-    struct DoCleanup {
-        pointer_array_type *my_pa;
-        list_array_type *my_elements;
-        size_t my_size;
-
-        DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
-            my_pa(&pa), my_elements(&my_els), my_size(sz) {  }
-        ~DoCleanup() {
-            if(my_pa) {
-                size_t dont_care = 0;
-                internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
-            }
-        }
-    };
-
-    // exception-safety requires we do all the potentially-throwing operations first
-    void grow_array() {
-        size_t new_size = my_size*2;
-        size_t new_nelements = nelements;  // internal_free_buffer zeroes this
-        list_array_type new_elements_array = NULL;
-        pointer_array_type new_pointer_array = NULL;
-        list_array_type new_free_list = NULL;
-        {
-            DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
-            new_elements_array = elements_array_allocator().allocate(my_size);
-            new_pointer_array = pointer_array_allocator_type().allocate(new_size);
-            for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL;
-            set_up_free_list(&new_free_list, new_elements_array, my_size );
-
-            for(size_t i=0; i < my_size; ++i) {
-                for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) {
-                    value_type *ov = reinterpret_cast<value_type *>(&(op->first));
-                    // could have std::move semantics
-                    internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov);
-                }
-            }
-            my_cleanup.my_pa = NULL;
-            my_cleanup.my_elements = NULL;
-        }
-
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        free_list = new_free_list;
-        pointer_array = new_pointer_array;
-        elements_array = new_elements_array;
-        my_size = new_size;
-        nelements = new_nelements;
-    }
-
-    // v should have perfect forwarding if std::move implemented.
-    // we use this method to move elements in grow_array, so can't use class fields
-    void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
-            const value_type &v) {
-        size_t l_mask = p_sz-1;
-        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-        size_t h = this->hash((*my_key)(v)) & l_mask;
-        __TBB_ASSERT(p_free_list, "Error: free list not set up.");
-        element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second);
-        (void) new(&(my_elem->first)) value_type(v);
-        my_elem->second = p_pointer_array[h];
-        p_pointer_array[h] = my_elem;
-    }
-
-    void internal_initialize_buffer() {
-        pointer_array = pointer_array_allocator_type().allocate(my_size);
-        for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL;
-        elements_array = elements_array_allocator().allocate(my_size / 2);
-        set_up_free_list(&free_list, elements_array, my_size / 2);
-    }
-
-    // made static so an enclosed class can use to properly dispose of the internals
-    static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
-        if(pa) {
-            for(size_t i = 0; i < sz; ++i ) {
-                element_type *p_next;
-                for( element_type *p = pa[i]; p; p = p_next) {
-                    p_next = (element_type *)p->second;
-                    // TODO revamp: make sure type casting is correct.
-                    void* ptr = (void*)(p->first);
-#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER
-                    suppress_unused_warning(ptr);
-#endif
-                    ((value_type*)ptr)->~value_type();
-                }
-            }
-            pointer_array_allocator_type().deallocate(pa, sz);
-            pa = NULL;
-        }
-        // Separate test (if allocation of pa throws, el may be allocated.
-        // but no elements will be constructed.)
-        if(el) {
-            elements_array_allocator().deallocate(el, sz / 2);
-            el = NULL;
-        }
-        sz = INITIAL_SIZE;
-        ne = 0;
-    }
-
-public:
-    hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) {
-        internal_initialize_buffer();
-    }
-
-    ~hash_buffer() {
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        if(my_key) delete my_key;
-    }
-    hash_buffer(const hash_buffer&) = delete;
-    hash_buffer& operator=(const hash_buffer&) = delete;
-
-    void reset() {
-        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
-        internal_initialize_buffer();
-    }
-
-    // Take ownership of func object allocated with new.
-    // This method is only used internally, so can't be misused by user.
-    void set_key_func(ValueToKey *vtk) { my_key = vtk; }
-    // pointer is used to clone()
-    ValueToKey* get_key_func() { return my_key; }
-
-    bool insert_with_key(const value_type &v) {
-        pointer_type p = NULL;
-        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-        if(find_ref_with_key((*my_key)(v), p)) {
-            p->~value_type();
-            (void) new(p) value_type(v);  // copy-construct into the space
-            return false;
-        }
-        ++nelements;
-        if(nelements*2 > my_size) grow_array();
-        internal_insert_with_key(pointer_array, my_size, free_list, v);
-        return true;
-    }
-
-    // returns true and sets v to array element if found, else returns false.
-    bool find_ref_with_key(const Knoref& k, pointer_type &v) {
-        size_t i = this->hash(k) & mask();
-        for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) {
-            pointer_type pv = reinterpret_cast<pointer_type>(&(p->first));
-            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-            if(this->equal((*my_key)(*pv), k)) {
-                v = pv;
-                return true;
-            }
-        }
-        return false;
-    }
-
-    bool find_with_key( const Knoref& k, value_type &v) {
-        value_type *p;
-        if(find_ref_with_key(k, p)) {
-            v = *p;
-            return true;
-        }
-        else
-            return false;
-    }
-
-    void delete_with_key(const Knoref& k) {
-        size_t h = this->hash(k) & mask();
-        element_type* prev = NULL;
-        for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) {
-            value_type *vp = reinterpret_cast<value_type *>(&(p->first));
-            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
-            if(this->equal((*my_key)(*vp), k)) {
-                vp->~value_type();
-                if(prev) prev->second = p->second;
-                else pointer_array[h] = (element_type *)(p->second);
-                p->second = free_list;
-                free_list = p;
-                --nelements;
-                return;
-            }
-        }
-        __TBB_ASSERT(false, "key not found for delete");
-    }
-};
-#endif // __TBB__flow_graph_hash_buffer_impl_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+// a hash table buffer that can expand, and can support as many deletions as 
+// additions, list-based, with elements of list held in array (for destruction 
+// management), multiplicative hashing (like ets).  No synchronization built-in. 
+// 
+ 
+#ifndef __TBB__flow_graph_hash_buffer_impl_H 
+#define __TBB__flow_graph_hash_buffer_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included in namespace tbb::flow::interfaceX::internal 
+ 
+// elements in the table are a simple list; we need pointer to next element to 
+// traverse the chain 
+template<typename ValueType> 
+struct buffer_element_type { 
+    // the second parameter below is void * because we can't forward-declare the type 
+    // itself, so we just reinterpret_cast below. 
+    typedef typename aligned_pair<ValueType, void *>::type type; 
+}; 
+ 
+template 
+    < 
+     typename Key,         // type of key within ValueType 
+     typename ValueType, 
+     typename ValueToKey,  // abstract method that returns "const Key" or "const Key&" given ValueType 
+     typename HashCompare, // has hash and equal 
+     typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type > 
+    > 
+class hash_buffer : public HashCompare { 
+public: 
+    static const size_t INITIAL_SIZE = 8;  // initial size of the hash pointer table 
+    typedef ValueType value_type; 
+    typedef typename buffer_element_type< value_type >::type element_type; 
+    typedef value_type *pointer_type; 
+    typedef element_type *list_array_type;  // array we manage manually 
+    typedef list_array_type *pointer_array_type; 
+    typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type; 
+    typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator; 
+    typedef typename std::decay<Key>::type Knoref; 
+ 
+private: 
+    ValueToKey *my_key; 
+    size_t my_size; 
+    size_t nelements; 
+    pointer_array_type pointer_array;    // pointer_array[my_size] 
+    list_array_type elements_array;      // elements_array[my_size / 2] 
+    element_type* free_list; 
+ 
+    size_t mask() { return my_size - 1; } 
+ 
+    void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { 
+        for(size_t i=0; i < sz - 1; ++i ) {  // construct free list 
+            la[i].second = &(la[i+1]); 
+        } 
+        la[sz-1].second = NULL; 
+        *p_free_list = (element_type *)&(la[0]); 
+    } 
+ 
+    // cleanup for exceptions 
+    struct DoCleanup { 
+        pointer_array_type *my_pa; 
+        list_array_type *my_elements; 
+        size_t my_size; 
+ 
+        DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) : 
+            my_pa(&pa), my_elements(&my_els), my_size(sz) {  } 
+        ~DoCleanup() { 
+            if(my_pa) { 
+                size_t dont_care = 0; 
+                internal_free_buffer(*my_pa, *my_elements, my_size, dont_care); 
+            } 
+        } 
+    }; 
+ 
+    // exception-safety requires we do all the potentially-throwing operations first 
+    void grow_array() { 
+        size_t new_size = my_size*2; 
+        size_t new_nelements = nelements;  // internal_free_buffer zeroes this 
+        list_array_type new_elements_array = NULL; 
+        pointer_array_type new_pointer_array = NULL; 
+        list_array_type new_free_list = NULL; 
+        { 
+            DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); 
+            new_elements_array = elements_array_allocator().allocate(my_size); 
+            new_pointer_array = pointer_array_allocator_type().allocate(new_size); 
+            for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL; 
+            set_up_free_list(&new_free_list, new_elements_array, my_size ); 
+ 
+            for(size_t i=0; i < my_size; ++i) { 
+                for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { 
+                    value_type *ov = reinterpret_cast<value_type *>(&(op->first)); 
+                    // could have std::move semantics 
+                    internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); 
+                } 
+            } 
+            my_cleanup.my_pa = NULL; 
+            my_cleanup.my_elements = NULL; 
+        } 
+ 
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements); 
+        free_list = new_free_list; 
+        pointer_array = new_pointer_array; 
+        elements_array = new_elements_array; 
+        my_size = new_size; 
+        nelements = new_nelements; 
+    } 
+ 
+    // v should have perfect forwarding if std::move implemented. 
+    // we use this method to move elements in grow_array, so can't use class fields 
+    void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, 
+            const value_type &v) { 
+        size_t l_mask = p_sz-1; 
+        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); 
+        size_t h = this->hash((*my_key)(v)) & l_mask; 
+        __TBB_ASSERT(p_free_list, "Error: free list not set up."); 
+        element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); 
+        (void) new(&(my_elem->first)) value_type(v); 
+        my_elem->second = p_pointer_array[h]; 
+        p_pointer_array[h] = my_elem; 
+    } 
+ 
+    void internal_initialize_buffer() { 
+        pointer_array = pointer_array_allocator_type().allocate(my_size); 
+        for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL; 
+        elements_array = elements_array_allocator().allocate(my_size / 2); 
+        set_up_free_list(&free_list, elements_array, my_size / 2); 
+    } 
+ 
+    // made static so an enclosed class can use to properly dispose of the internals 
+    static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) { 
+        if(pa) { 
+            for(size_t i = 0; i < sz; ++i ) { 
+                element_type *p_next; 
+                for( element_type *p = pa[i]; p; p = p_next) { 
+                    p_next = (element_type *)p->second; 
+                    // TODO revamp: make sure type casting is correct. 
+                    void* ptr = (void*)(p->first); 
+#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER 
+                    suppress_unused_warning(ptr); 
+#endif 
+                    ((value_type*)ptr)->~value_type(); 
+                } 
+            } 
+            pointer_array_allocator_type().deallocate(pa, sz); 
+            pa = NULL; 
+        } 
+        // Separate test (if allocation of pa throws, el may be allocated. 
+        // but no elements will be constructed.) 
+        if(el) { 
+            elements_array_allocator().deallocate(el, sz / 2); 
+            el = NULL; 
+        } 
+        sz = INITIAL_SIZE; 
+        ne = 0; 
+    } 
+ 
+public: 
+    hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) { 
+        internal_initialize_buffer(); 
+    } 
+ 
+    ~hash_buffer() { 
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements); 
+        if(my_key) delete my_key; 
+    } 
+    hash_buffer(const hash_buffer&) = delete; 
+    hash_buffer& operator=(const hash_buffer&) = delete; 
+ 
+    void reset() { 
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements); 
+        internal_initialize_buffer(); 
+    } 
+ 
+    // Take ownership of func object allocated with new. 
+    // This method is only used internally, so can't be misused by user. 
+    void set_key_func(ValueToKey *vtk) { my_key = vtk; } 
+    // pointer is used to clone() 
+    ValueToKey* get_key_func() { return my_key; } 
+ 
+    bool insert_with_key(const value_type &v) { 
+        pointer_type p = NULL; 
+        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); 
+        if(find_ref_with_key((*my_key)(v), p)) { 
+            p->~value_type(); 
+            (void) new(p) value_type(v);  // copy-construct into the space 
+            return false; 
+        } 
+        ++nelements; 
+        if(nelements*2 > my_size) grow_array(); 
+        internal_insert_with_key(pointer_array, my_size, free_list, v); 
+        return true; 
+    } 
+ 
+    // returns true and sets v to array element if found, else returns false. 
+    bool find_ref_with_key(const Knoref& k, pointer_type &v) { 
+        size_t i = this->hash(k) & mask(); 
+        for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { 
+            pointer_type pv = reinterpret_cast<pointer_type>(&(p->first)); 
+            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); 
+            if(this->equal((*my_key)(*pv), k)) { 
+                v = pv; 
+                return true; 
+            } 
+        } 
+        return false; 
+    } 
+ 
+    bool find_with_key( const Knoref& k, value_type &v) { 
+        value_type *p; 
+        if(find_ref_with_key(k, p)) { 
+            v = *p; 
+            return true; 
+        } 
+        else 
+            return false; 
+    } 
+ 
+    void delete_with_key(const Knoref& k) { 
+        size_t h = this->hash(k) & mask(); 
+        element_type* prev = NULL; 
+        for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { 
+            value_type *vp = reinterpret_cast<value_type *>(&(p->first)); 
+            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); 
+            if(this->equal((*my_key)(*vp), k)) { 
+                vp->~value_type(); 
+                if(prev) prev->second = p->second; 
+                else pointer_array[h] = (element_type *)(p->second); 
+                p->second = free_list; 
+                free_list = p; 
+                --nelements; 
+                return; 
+            } 
+        } 
+        __TBB_ASSERT(false, "key not found for delete"); 
+    } 
+}; 
+#endif // __TBB__flow_graph_hash_buffer_impl_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h
index d8256ca8a2..be8ad53a04 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h
@@ -1,364 +1,364 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef _FGT_GRAPH_TRACE_IMPL_H
-#define _FGT_GRAPH_TRACE_IMPL_H
-
-#include "../profiling.h"
-#if (_MSC_VER >= 1900)
-    #include <intrin.h>
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template< typename T > class sender;
-template< typename T > class receiver;
-
-#if TBB_USE_PROFILING_TOOLS
-    #if __TBB_FLOW_TRACE_CODEPTR
-        #if (_MSC_VER >= 1900)
-            #define CODEPTR() (_ReturnAddress())
-        #elif __TBB_GCC_VERSION >= 40800
-            #define CODEPTR() ( __builtin_return_address(0))
-        #else
-            #define CODEPTR() NULL
-        #endif
-    #else
-        #define CODEPTR() NULL
-    #endif /* __TBB_FLOW_TRACE_CODEPTR */
-
-static inline void fgt_alias_port(void *node, void *p, bool visible) {
-    if(visible)
-        itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE );
-    else
-        itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE );
-}
-
-static inline void fgt_composite ( void* codeptr, void *node, void *graph ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE );
-    suppress_unused_warning( codeptr );
-#if __TBB_FLOW_TRACE_CODEPTR
-    if (codeptr != NULL) {
-        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
-    }
-#endif
-}
-
-static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
-    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT );
-}
-
-static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
-    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT );
-}
-
-template<typename InputType>
-void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) {
-    // TODO: Make fgt_internal_alias_input_port a function template?
-    fgt_internal_alias_input_port( node, port, name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_input_alias_helper {
-    static void alias_port( void *node, PortsTuple &ports ) {
-        alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) );
-        fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_input_alias_helper<PortsTuple, 0> {
-    static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { }
-};
-
-template<typename OutputType>
-void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) {
-    // TODO: Make fgt_internal_alias_output_port a function template?
-    fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_output_alias_helper {
-    static void alias_port( void *node, PortsTuple &ports ) {
-        alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
-        fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_output_alias_helper<PortsTuple, 0> {
-    static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) {
-    }
-};
-
-static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
-}
-
-static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) {
-    itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index);
-    suppress_unused_warning( codeptr );
-#if __TBB_FLOW_TRACE_CODEPTR
-    if (codeptr != NULL) {
-        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
-    }
-#endif
-}
-
-template<typename InputType>
-void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) {
-    // TODO: Make fgt_internal_create_input_port a function template?
-    fgt_internal_create_input_port(node, static_cast<void*>(port), name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_input_helper {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) );
-        fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_input_helper<PortsTuple, 1> {
-    static void register_port( void *node, PortsTuple &ports ) {
-        register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 );
-    }
-};
-
-template<typename OutputType>
-void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) {
-    // TODO: Make fgt_internal_create_output_port a function template?
-    fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index);
-}
-
-template < typename PortsTuple, int N >
-struct fgt_internal_output_helper {
-    static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
-        register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
-        fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports );
-    }
-};
-
-template < typename PortsTuple >
-struct fgt_internal_output_helper<PortsTuple,1> {
-    static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
-        register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 );
-    }
-};
-
-template< typename NodeType >
-void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) );
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-template< typename NodeType >
-void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  const_cast<NodeType *>(node);
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-template< typename NodeType >
-static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
-    void *addr =  (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) );
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
-}
-
-static inline void fgt_graph_desc( const void *g, const char *desc ) {
-    void *addr = const_cast< void *>(g);
-    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc );
-}
-
-static inline void fgt_body( void *node, void *body ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
-    fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
-    fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports );
-    fgt_body( input_port, body );
-}
-
-template< int N, typename PortsTuple >
-static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
-    fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports );
-}
-
-static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t );
-    suppress_unused_warning( codeptr );
-#if __TBB_FLOW_TRACE_CODEPTR
-    if (codeptr != NULL) {
-        register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr);
-    }
-#endif
-}
-
-static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
-}
-
-static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
-    fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
-    fgt_body( output_port, body );
-}
-
-static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) {
-    fgt_node( codeptr, t, g, output_port );
-    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
-}
-
-static inline void  fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) {
-    fgt_node_with_body( codeptr, t, g, output_port, body );
-    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
-}
-
-
-static inline void  fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
-    fgt_node( codeptr, t, g, input_port, output_port );
-    fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 );
-}
-
-static inline void fgt_make_edge( void *output_port, void *input_port ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
-}
-
-static inline void fgt_remove_edge( void *output_port, void *input_port ) {
-    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
-}
-
-static inline void fgt_graph( void *g ) {
-    itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH );
-}
-
-static inline void fgt_begin_body( void *body ) {
-    itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY );
-}
-
-static inline void fgt_end_body( void * ) {
-    itt_task_end( ITT_DOMAIN_FLOW );
-}
-
-static inline void fgt_async_try_put_begin( void *node, void *port ) {
-    itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT );
-}
-
-static inline void fgt_async_try_put_end( void *, void * ) {
-    itt_task_end( ITT_DOMAIN_FLOW );
-}
-
-static inline void fgt_async_reserve( void *node, void *graph ) {
-    itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL );
-}
-
-static inline void fgt_async_commit( void *node, void * /*graph*/) {
-    itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE );
-}
-
-static inline void fgt_reserve_wait( void *graph ) {
-    itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL );
-}
-
-static inline void fgt_release_wait( void *graph ) {
-    itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH );
-}
-
-#else // TBB_USE_PROFILING_TOOLS
-
-#define CODEPTR() NULL
-
-static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { }
-
-static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { }
-
-static inline void fgt_graph( void * /*g*/ ) { }
-
-template< typename NodeType >
-static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
-
-template< typename NodeType >
-static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
-
-static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
-
-template< int N, typename PortsTuple >
-static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
-
-static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { }
-
-static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { }
-static inline void  fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
-
-static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
-static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
-
-static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
-static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
-
-static inline void fgt_begin_body( void * /*body*/ ) { }
-static inline void fgt_end_body( void *  /*body*/) { }
-
-static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { }
-static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { }
-static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { }
-static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { }
-static inline void fgt_reserve_wait( void * /*graph*/ ) { }
-static inline void fgt_release_wait( void * /*graph*/ ) { }
-
-template< typename NodeType >
-void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
-
-template < typename PortsTuple, int N >
-struct fgt_internal_input_alias_helper {
-    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
-};
-
-template < typename PortsTuple, int N >
-struct fgt_internal_output_alias_helper {
-    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
-};
-
-#endif // TBB_USE_PROFILING_TOOLS
-
-} // d1
-} // namespace detail
-} // namespace tbb
-
-#endif // _FGT_GRAPH_TRACE_IMPL_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef _FGT_GRAPH_TRACE_IMPL_H 
+#define _FGT_GRAPH_TRACE_IMPL_H 
+ 
+#include "../profiling.h" 
+#if (_MSC_VER >= 1900) 
+    #include <intrin.h> 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template< typename T > class sender; 
+template< typename T > class receiver; 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+    #if __TBB_FLOW_TRACE_CODEPTR 
+        #if (_MSC_VER >= 1900) 
+            #define CODEPTR() (_ReturnAddress()) 
+        #elif __TBB_GCC_VERSION >= 40800 
+            #define CODEPTR() ( __builtin_return_address(0)) 
+        #else 
+            #define CODEPTR() NULL 
+        #endif 
+    #else 
+        #define CODEPTR() NULL 
+    #endif /* __TBB_FLOW_TRACE_CODEPTR */ 
+ 
+static inline void fgt_alias_port(void *node, void *p, bool visible) { 
+    if(visible) 
+        itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); 
+    else 
+        itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); 
+} 
+ 
+static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); 
+    suppress_unused_warning( codeptr ); 
+#if __TBB_FLOW_TRACE_CODEPTR 
+    if (codeptr != NULL) { 
+        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); 
+    } 
+#endif 
+} 
+ 
+static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); 
+    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); 
+} 
+ 
+static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); 
+    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); 
+} 
+ 
+template<typename InputType> 
+void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { 
+    // TODO: Make fgt_internal_alias_input_port a function template? 
+    fgt_internal_alias_input_port( node, port, name_index); 
+} 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_input_alias_helper { 
+    static void alias_port( void *node, PortsTuple &ports ) { 
+        alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); 
+        fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); 
+    } 
+}; 
+ 
+template < typename PortsTuple > 
+struct fgt_internal_input_alias_helper<PortsTuple, 0> { 
+    static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { } 
+}; 
+ 
+template<typename OutputType> 
+void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) { 
+    // TODO: Make fgt_internal_alias_output_port a function template? 
+    fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index); 
+} 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_output_alias_helper { 
+    static void alias_port( void *node, PortsTuple &ports ) { 
+        alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); 
+        fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); 
+    } 
+}; 
+ 
+template < typename PortsTuple > 
+struct fgt_internal_output_alias_helper<PortsTuple, 0> { 
+    static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) { 
+    } 
+}; 
+ 
+static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); 
+} 
+ 
+static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { 
+    itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); 
+    suppress_unused_warning( codeptr ); 
+#if __TBB_FLOW_TRACE_CODEPTR 
+    if (codeptr != NULL) { 
+        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); 
+    } 
+#endif 
+} 
+ 
+template<typename InputType> 
+void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { 
+    // TODO: Make fgt_internal_create_input_port a function template? 
+    fgt_internal_create_input_port(node, static_cast<void*>(port), name_index); 
+} 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_input_helper { 
+    static void register_port( void *node, PortsTuple &ports ) { 
+        register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); 
+        fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports ); 
+    } 
+}; 
+ 
+template < typename PortsTuple > 
+struct fgt_internal_input_helper<PortsTuple, 1> { 
+    static void register_port( void *node, PortsTuple &ports ) { 
+        register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 ); 
+    } 
+}; 
+ 
+template<typename OutputType> 
+void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) { 
+    // TODO: Make fgt_internal_create_output_port a function template? 
+    fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index); 
+} 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_output_helper { 
+    static void register_port( void* codeptr, void *node, PortsTuple &ports ) { 
+        register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); 
+        fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports ); 
+    } 
+}; 
+ 
+template < typename PortsTuple > 
+struct fgt_internal_output_helper<PortsTuple,1> { 
+    static void register_port( void* codeptr, void *node, PortsTuple &ports ) { 
+        register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 ); 
+    } 
+}; 
+ 
+template< typename NodeType > 
+void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { 
+    void *addr =  (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); 
+} 
+ 
+template< typename NodeType > 
+void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { 
+    void *addr =  const_cast<NodeType *>(node); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); 
+} 
+ 
+template< typename NodeType > 
+static inline void fgt_node_desc( const NodeType *node, const char *desc ) { 
+    void *addr =  (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); 
+} 
+ 
+static inline void fgt_graph_desc( const void *g, const char *desc ) { 
+    void *addr = const_cast< void *>(g); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); 
+} 
+ 
+static inline void fgt_body( void *node, void *body ) { 
+    itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); 
+} 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); 
+    fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports ); 
+} 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); 
+    fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports ); 
+    fgt_body( input_port, body ); 
+} 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+    fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports ); 
+} 
+ 
+static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    suppress_unused_warning( codeptr ); 
+#if __TBB_FLOW_TRACE_CODEPTR 
+    if (codeptr != NULL) { 
+        register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); 
+    } 
+#endif 
+} 
+ 
+static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+} 
+ 
+static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+    fgt_body( output_port, body ); 
+} 
+ 
+static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) { 
+    fgt_node( codeptr, t, g, output_port ); 
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); 
+} 
+ 
+static inline void  fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) { 
+    fgt_node_with_body( codeptr, t, g, output_port, body ); 
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); 
+} 
+ 
+ 
+static inline void  fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) { 
+    fgt_node( codeptr, t, g, input_port, output_port ); 
+    fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); 
+} 
+ 
+static inline void fgt_make_edge( void *output_port, void *input_port ) { 
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); 
+} 
+ 
+static inline void fgt_remove_edge( void *output_port, void *input_port ) { 
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); 
+} 
+ 
+static inline void fgt_graph( void *g ) { 
+    itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); 
+} 
+ 
+static inline void fgt_begin_body( void *body ) { 
+    itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY ); 
+} 
+ 
+static inline void fgt_end_body( void * ) { 
+    itt_task_end( ITT_DOMAIN_FLOW ); 
+} 
+ 
+static inline void fgt_async_try_put_begin( void *node, void *port ) { 
+    itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); 
+} 
+ 
+static inline void fgt_async_try_put_end( void *, void * ) { 
+    itt_task_end( ITT_DOMAIN_FLOW ); 
+} 
+ 
+static inline void fgt_async_reserve( void *node, void *graph ) { 
+    itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); 
+} 
+ 
+static inline void fgt_async_commit( void *node, void * /*graph*/) { 
+    itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); 
+} 
+ 
+static inline void fgt_reserve_wait( void *graph ) { 
+    itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL ); 
+} 
+ 
+static inline void fgt_release_wait( void *graph ) { 
+    itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); 
+} 
+ 
+#else // TBB_USE_PROFILING_TOOLS 
+ 
+#define CODEPTR() NULL 
+ 
+static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { } 
+ 
+static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { } 
+ 
+static inline void fgt_graph( void * /*g*/ ) { } 
+ 
+template< typename NodeType > 
+static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } 
+ 
+template< typename NodeType > 
+static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } 
+ 
+static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { } 
+ 
+template< int N, typename PortsTuple > 
+static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { } 
+ 
+static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } 
+ 
+static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } 
+static inline void  fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } 
+ 
+static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { } 
+static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { } 
+ 
+static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { } 
+static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { } 
+ 
+static inline void fgt_begin_body( void * /*body*/ ) { } 
+static inline void fgt_end_body( void *  /*body*/) { } 
+ 
+static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { } 
+static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { } 
+static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { } 
+static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { } 
+static inline void fgt_reserve_wait( void * /*graph*/ ) { } 
+static inline void fgt_release_wait( void * /*graph*/ ) { } 
+ 
+template< typename NodeType > 
+void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_input_alias_helper { 
+    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } 
+}; 
+ 
+template < typename PortsTuple, int N > 
+struct fgt_internal_output_alias_helper { 
+    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } 
+}; 
+ 
+#endif // TBB_USE_PROFILING_TOOLS 
+ 
+} // d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // _FGT_GRAPH_TRACE_IMPL_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h
index 97c770b154..e00dd14210 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h
@@ -1,407 +1,407 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__flow_graph_types_impl_H
-#define __TBB__flow_graph_types_impl_H
-
-#ifndef __TBB_flow_graph_H
-#error Do not #include this internal file directly; use public TBB headers instead.
-#endif
-
-// included in namespace tbb::detail::d1
-
-// the change to key_matching (adding a K and KHash template parameter, making it a class)
-// means we have to pass this data to the key_matching_port.  All the ports have only one
-// template parameter, so we have to wrap the following types in a trait:
-//
-//    . K == key_type
-//    . KHash == hash and compare for Key
-//    . TtoK == function_body that given an object of T, returns its K
-//    . T == type accepted by port, and stored in the hash table
-//
-// The port will have an additional parameter on node construction, which is a function_body
-// that accepts a const T& and returns a K which is the field in T which is its K.
-template<typename Kp, typename KHashp, typename Tp>
-struct KeyTrait {
-    typedef Kp K;
-    typedef Tp T;
-    typedef type_to_key_function_body<T,K> TtoK;
-    typedef KHashp KHash;
-};
-
-// wrap each element of a tuple in a template, and make a tuple of the result.
-template<int N, template<class> class PT, typename TypeTuple>
-struct wrap_tuple_elements;
-
-// A wrapper that generates the traits needed for each port of a key-matching join,
-// and the type of the tuple of input ports.
-template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple>
-struct wrap_key_tuple_elements;
-
-template<int N, template<class> class PT,  typename... Args>
-struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{
-    typedef typename std::tuple<PT<Args>... > type;
-};
-
-template<int N, template<class> class PT, typename KeyTraits, typename... Args>
-struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > {
-    typedef typename KeyTraits::key_type K;
-    typedef typename KeyTraits::hash_compare_type KHash;
-    typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type;
-};
-
-template< int... S > class sequence {};
-
-template< int N, int... S >
-struct make_sequence : make_sequence < N - 1, N - 1, S... > {};
-
-template< int... S >
-struct make_sequence < 0, S... > {
-    typedef sequence<S...> type;
-};
-
-//! type mimicking std::pair but with trailing fill to ensure each element of an array
-//* will have the correct alignment
-template<typename T1, typename T2, size_t REM>
-struct type_plus_align {
-    char first[sizeof(T1)];
-    T2 second;
-    char fill1[REM];
-};
-
-template<typename T1, typename T2>
-struct type_plus_align<T1,T2,0> {
-    char first[sizeof(T1)];
-    T2 second;
-};
-
-template<class U> struct alignment_of {
-    typedef struct { char t; U    padded; } test_alignment;
-    static const size_t value = sizeof(test_alignment) - sizeof(U);
-};
-
-// T1, T2 are actual types stored.  The space defined for T1 in the type returned
-// is a char array of the correct size.  Type T2 should be trivially-constructible,
-// T1 must be explicitly managed.
-template<typename T1, typename T2>
-struct aligned_pair {
-    static const size_t t1_align = alignment_of<T1>::value;
-    static const size_t t2_align = alignment_of<T2>::value;
-    typedef type_plus_align<T1, T2, 0 > just_pair;
-    static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
-    static const size_t extra_bytes = sizeof(just_pair) % max_align;
-    static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
-public:
-    typedef type_plus_align<T1,T2,remainder> type;
-};  // aligned_pair
-
-// support for variant type
-// type we use when we're not storing a value
-struct default_constructed { };
-
-// type which contains another type, tests for what type is contained, and references to it.
-// Wrapper<T>
-//     void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
-
-// struct to allow us to copy and test the type of objects
-struct WrapperBase {
-    virtual ~WrapperBase() {}
-    virtual void CopyTo(void* /*newSpace*/) const = 0;
-};
-
-// Wrapper<T> contains a T, with the ability to test what T is.  The Wrapper<T> can be
-// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
-// examined via value(), but not modified.
-template<typename T>
-struct Wrapper: public WrapperBase {
-    typedef T value_type;
-    typedef T* pointer_type;
-private:
-    T value_space;
-public:
-    const value_type &value() const { return value_space; }
-
-private:
-    Wrapper();
-
-    // on exception will ensure the Wrapper will contain only a trivially-constructed object
-    struct _unwind_space {
-        pointer_type space;
-        _unwind_space(pointer_type p) : space(p) {}
-        ~_unwind_space() {
-            if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
-        }
-    };
-public:
-    explicit Wrapper( const T& other ) : value_space(other) { }
-    explicit Wrapper(const Wrapper& other) = delete;
-
-    void CopyTo(void* newSpace) const override {
-        _unwind_space guard((pointer_type)newSpace);
-        (void) new(newSpace) Wrapper(value_space);
-        guard.space = NULL;
-    }
-    ~Wrapper() { }
-};
-
-// specialization for array objects
-template<typename T, size_t N>
-struct Wrapper<T[N]> : public WrapperBase {
-    typedef T value_type;
-    typedef T* pointer_type;
-    // space must be untyped.
-    typedef T ArrayType[N];
-private:
-    // The space is not of type T[N] because when copy-constructing, it would be
-    // default-initialized and then copied to in some fashion, resulting in two
-    // constructions and one destruction per element.  If the type is char[ ], we
-    // placement new into each element, resulting in one construction per element.
-    static const size_t space_size = sizeof(ArrayType) / sizeof(char);
-    char value_space[space_size];
-
-
-    // on exception will ensure the already-built objects will be destructed
-    // (the value_space is a char array, so it is already trivially-destructible.)
-    struct _unwind_class {
-        pointer_type space;
-        int    already_built;
-        _unwind_class(pointer_type p) : space(p), already_built(0) {}
-        ~_unwind_class() {
-            if(space) {
-                for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
-                (void) new(space) Wrapper<default_constructed>(default_constructed());
-            }
-        }
-    };
-public:
-    const ArrayType &value() const {
-        char *vp = const_cast<char *>(value_space);
-        return reinterpret_cast<ArrayType &>(*vp);
-    }
-
-private:
-    Wrapper();
-public:
-    // have to explicitly construct because other decays to a const value_type*
-    explicit Wrapper(const ArrayType& other) {
-        _unwind_class guard((pointer_type)value_space);
-        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
-        for(size_t i = 0; i < N; ++i ) {
-            (void) new(vp++) value_type(other[i]);
-            ++(guard.already_built);
-        }
-        guard.space = NULL;
-    }
-    explicit Wrapper(const Wrapper& other) : WrapperBase() {
-        // we have to do the heavy lifting to copy contents
-        _unwind_class guard((pointer_type)value_space);
-        pointer_type dp = reinterpret_cast<pointer_type>(value_space);
-        pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
-        for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
-            (void) new(dp) value_type(*sp);
-            ++(guard.already_built);
-        }
-        guard.space = NULL;
-    }
-
-    void CopyTo(void* newSpace) const override {
-        (void) new(newSpace) Wrapper(*this);  // exceptions handled in copy constructor
-    }
-
-    ~Wrapper() {
-        // have to destroy explicitly in reverse order
-        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
-        for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
-    }
-};
-
-// given a tuple, return the type of the element that has the maximum alignment requirement.
-// Given a tuple and that type, return the number of elements of the object with the max
-// alignment requirement that is at least as big as the largest object in the tuple.
-
-template<bool, class T1, class T2> struct pick_one;
-template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
-template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
-
-template< template<class> class Selector, typename T1, typename T2 >
-struct pick_max {
-    typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
-};
-
-template<typename T> struct size_of { static const int value = sizeof(T); };
-
-template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
-    typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
-    typedef typename std::tuple_element<N-1, Tuple>::type ThisType;
-    typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
-};
-
-template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
-    typedef typename std::tuple_element<0, Tuple>::type type;
-};
-
-// is the specified type included in a tuple?
-template<class Q, size_t N, class Tuple>
-struct is_element_of {
-    typedef typename std::tuple_element<N-1, Tuple>::type T_i;
-    static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
-};
-
-template<class Q, class Tuple>
-struct is_element_of<Q,0,Tuple> {
-    typedef typename std::tuple_element<0, Tuple>::type T_i;
-    static const bool value = std::is_same<Q,T_i>::value;
-};
-
-// allow the construction of types that are listed tuple.  If a disallowed type
-// construction is written, a method involving this type is created.  The
-// type has no definition, so a syntax error is generated.
-template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
-
-template<typename T, bool BUILD_IT> struct do_if;
-template<typename T>
-struct do_if<T, true> {
-    static void construct(void *mySpace, const T& x) {
-        (void) new(mySpace) Wrapper<T>(x);
-    }
-};
-template<typename T>
-struct do_if<T, false> {
-    static void construct(void * /*mySpace*/, const T& x) {
-        // This method is instantiated when the type T does not match any of the
-        // element types in the Tuple in variant<Tuple>.
-        ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
-    }
-};
-
-// Tuple tells us the allowed types that variant can hold.  It determines the alignment of the space in
-// Wrapper, and how big Wrapper is.
-//
-// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
-
-using tbb::detail::punned_cast;
-struct tagged_null_type {};
-template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type,
-                           typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
-                           typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
-class tagged_msg {
-    typedef std::tuple<T0, T1, T2, T3, T4
-                  //TODO: Should we reject lists longer than a tuple can hold?
-                  #if __TBB_VARIADIC_MAX >= 6
-                  , T5
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 7
-                  , T6
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 8
-                  , T7
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 9
-                  , T8
-                  #endif
-                  #if __TBB_VARIADIC_MAX >= 10
-                  , T9
-                  #endif
-                  > Tuple;
-
-private:
-    class variant {
-        static const size_t N = std::tuple_size<Tuple>::value;
-        typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
-        typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
-        static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
-        static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
-        typedef aligned_space<AlignType, MaxNElements> SpaceType;
-        SpaceType my_space;
-        static const size_t MaxSize = sizeof(SpaceType);
-
-    public:
-        variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
-
-        template<typename T>
-        variant( const T& x ) {
-            do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
-        }
-
-        variant(const variant& other) {
-            const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
-            h->CopyTo(&my_space);
-        }
-
-        // assignment must destroy and re-create the Wrapper type, as there is no way
-        // to create a Wrapper-to-Wrapper assign even if we find they agree in type.
-        void operator=( const variant& rhs ) {
-            if(&rhs != this) {
-                WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
-                h->~WrapperBase();
-                const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
-                ch->CopyTo(&my_space);
-            }
-        }
-
-        template<typename U>
-        const U& variant_cast_to() const {
-            const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
-            if(!h) {
-                throw_exception(exception_id::bad_tagged_msg_cast);
-            }
-            return h->value();
-        }
-        template<typename U>
-        bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; }
-
-        bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
-
-        ~variant() {
-            WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
-            h->~WrapperBase();
-        }
-    }; //class variant
-
-    TagType my_tag;
-    variant my_msg;
-
-public:
-    tagged_msg(): my_tag(TagType(~0)), my_msg(){}
-
-    template<typename T, typename R>
-    tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
-
-    template<typename T, typename R, size_t N>
-    tagged_msg(T const &index,  R (&value)[N]) : my_tag(index), my_msg(value) {}
-
-    void set_tag(TagType const &index) {my_tag = index;}
-    TagType tag() const {return my_tag;}
-
-    template<typename V>
-    const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
-
-    template<typename V>
-    bool is_a() const {return my_msg.template variant_is_a<V>();}
-
-    bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
-}; //class tagged_msg
-
-// template to simplify cast and test for tagged_msg in template contexts
-template<typename V, typename T>
-const V& cast_to(T const &t) { return t.template cast_to<V>(); }
-
-template<typename V, typename T>
-bool is_a(T const &t) { return t.template is_a<V>(); }
-
-enum op_stat { WAIT = 0, SUCCEEDED, FAILED };
-
-#endif  /* __TBB__flow_graph_types_impl_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__flow_graph_types_impl_H 
+#define __TBB__flow_graph_types_impl_H 
+ 
+#ifndef __TBB_flow_graph_H 
+#error Do not #include this internal file directly; use public TBB headers instead. 
+#endif 
+ 
+// included in namespace tbb::detail::d1 
+ 
+// the change to key_matching (adding a K and KHash template parameter, making it a class) 
+// means we have to pass this data to the key_matching_port.  All the ports have only one 
+// template parameter, so we have to wrap the following types in a trait: 
+// 
+//    . K == key_type 
+//    . KHash == hash and compare for Key 
+//    . TtoK == function_body that given an object of T, returns its K 
+//    . T == type accepted by port, and stored in the hash table 
+// 
+// The port will have an additional parameter on node construction, which is a function_body 
+// that accepts a const T& and returns a K which is the field in T which is its K. 
+template<typename Kp, typename KHashp, typename Tp> 
+struct KeyTrait { 
+    typedef Kp K; 
+    typedef Tp T; 
+    typedef type_to_key_function_body<T,K> TtoK; 
+    typedef KHashp KHash; 
+}; 
+ 
+// wrap each element of a tuple in a template, and make a tuple of the result. 
+template<int N, template<class> class PT, typename TypeTuple> 
+struct wrap_tuple_elements; 
+ 
+// A wrapper that generates the traits needed for each port of a key-matching join, 
+// and the type of the tuple of input ports. 
+template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple> 
+struct wrap_key_tuple_elements; 
+ 
+template<int N, template<class> class PT,  typename... Args> 
+struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{ 
+    typedef typename std::tuple<PT<Args>... > type; 
+}; 
+ 
+template<int N, template<class> class PT, typename KeyTraits, typename... Args> 
+struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > { 
+    typedef typename KeyTraits::key_type K; 
+    typedef typename KeyTraits::hash_compare_type KHash; 
+    typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type; 
+}; 
+ 
+template< int... S > class sequence {}; 
+ 
+template< int N, int... S > 
+struct make_sequence : make_sequence < N - 1, N - 1, S... > {}; 
+ 
+template< int... S > 
+struct make_sequence < 0, S... > { 
+    typedef sequence<S...> type; 
+}; 
+ 
+//! type mimicking std::pair but with trailing fill to ensure each element of an array 
+//* will have the correct alignment 
+template<typename T1, typename T2, size_t REM> 
+struct type_plus_align { 
+    char first[sizeof(T1)]; 
+    T2 second; 
+    char fill1[REM]; 
+}; 
+ 
+template<typename T1, typename T2> 
+struct type_plus_align<T1,T2,0> { 
+    char first[sizeof(T1)]; 
+    T2 second; 
+}; 
+ 
+template<class U> struct alignment_of { 
+    typedef struct { char t; U    padded; } test_alignment; 
+    static const size_t value = sizeof(test_alignment) - sizeof(U); 
+}; 
+ 
+// T1, T2 are actual types stored.  The space defined for T1 in the type returned 
+// is a char array of the correct size.  Type T2 should be trivially-constructible, 
+// T1 must be explicitly managed. 
+template<typename T1, typename T2> 
+struct aligned_pair { 
+    static const size_t t1_align = alignment_of<T1>::value; 
+    static const size_t t2_align = alignment_of<T2>::value; 
+    typedef type_plus_align<T1, T2, 0 > just_pair; 
+    static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; 
+    static const size_t extra_bytes = sizeof(just_pair) % max_align; 
+    static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; 
+public: 
+    typedef type_plus_align<T1,T2,remainder> type; 
+};  // aligned_pair 
+ 
+// support for variant type 
+// type we use when we're not storing a value 
+struct default_constructed { }; 
+ 
+// type which contains another type, tests for what type is contained, and references to it. 
+// Wrapper<T> 
+//     void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace 
+ 
+// struct to allow us to copy and test the type of objects 
+struct WrapperBase { 
+    virtual ~WrapperBase() {} 
+    virtual void CopyTo(void* /*newSpace*/) const = 0; 
+}; 
+ 
+// Wrapper<T> contains a T, with the ability to test what T is.  The Wrapper<T> can be 
+// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be 
+// examined via value(), but not modified. 
+template<typename T> 
+struct Wrapper: public WrapperBase { 
+    typedef T value_type; 
+    typedef T* pointer_type; 
+private: 
+    T value_space; 
+public: 
+    const value_type &value() const { return value_space; } 
+ 
+private: 
+    Wrapper(); 
+ 
+    // on exception will ensure the Wrapper will contain only a trivially-constructed object 
+    struct _unwind_space { 
+        pointer_type space; 
+        _unwind_space(pointer_type p) : space(p) {} 
+        ~_unwind_space() { 
+            if(space) (void) new (space) Wrapper<default_constructed>(default_constructed()); 
+        } 
+    }; 
+public: 
+    explicit Wrapper( const T& other ) : value_space(other) { } 
+    explicit Wrapper(const Wrapper& other) = delete; 
+ 
+    void CopyTo(void* newSpace) const override { 
+        _unwind_space guard((pointer_type)newSpace); 
+        (void) new(newSpace) Wrapper(value_space); 
+        guard.space = NULL; 
+    } 
+    ~Wrapper() { } 
+}; 
+ 
+// specialization for array objects 
+template<typename T, size_t N> 
+struct Wrapper<T[N]> : public WrapperBase { 
+    typedef T value_type; 
+    typedef T* pointer_type; 
+    // space must be untyped. 
+    typedef T ArrayType[N]; 
+private: 
+    // The space is not of type T[N] because when copy-constructing, it would be 
+    // default-initialized and then copied to in some fashion, resulting in two 
+    // constructions and one destruction per element.  If the type is char[ ], we 
+    // placement new into each element, resulting in one construction per element. 
+    static const size_t space_size = sizeof(ArrayType) / sizeof(char); 
+    char value_space[space_size]; 
+ 
+ 
+    // on exception will ensure the already-built objects will be destructed 
+    // (the value_space is a char array, so it is already trivially-destructible.) 
+    struct _unwind_class { 
+        pointer_type space; 
+        int    already_built; 
+        _unwind_class(pointer_type p) : space(p), already_built(0) {} 
+        ~_unwind_class() { 
+            if(space) { 
+                for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type(); 
+                (void) new(space) Wrapper<default_constructed>(default_constructed()); 
+            } 
+        } 
+    }; 
+public: 
+    const ArrayType &value() const { 
+        char *vp = const_cast<char *>(value_space); 
+        return reinterpret_cast<ArrayType &>(*vp); 
+    } 
+ 
+private: 
+    Wrapper(); 
+public: 
+    // have to explicitly construct because other decays to a const value_type* 
+    explicit Wrapper(const ArrayType& other) { 
+        _unwind_class guard((pointer_type)value_space); 
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space); 
+        for(size_t i = 0; i < N; ++i ) { 
+            (void) new(vp++) value_type(other[i]); 
+            ++(guard.already_built); 
+        } 
+        guard.space = NULL; 
+    } 
+    explicit Wrapper(const Wrapper& other) : WrapperBase() { 
+        // we have to do the heavy lifting to copy contents 
+        _unwind_class guard((pointer_type)value_space); 
+        pointer_type dp = reinterpret_cast<pointer_type>(value_space); 
+        pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space)); 
+        for(size_t i = 0; i < N; ++i, ++dp, ++sp) { 
+            (void) new(dp) value_type(*sp); 
+            ++(guard.already_built); 
+        } 
+        guard.space = NULL; 
+    } 
+ 
+    void CopyTo(void* newSpace) const override { 
+        (void) new(newSpace) Wrapper(*this);  // exceptions handled in copy constructor 
+    } 
+ 
+    ~Wrapper() { 
+        // have to destroy explicitly in reverse order 
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space); 
+        for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type(); 
+    } 
+}; 
+ 
+// given a tuple, return the type of the element that has the maximum alignment requirement. 
+// Given a tuple and that type, return the number of elements of the object with the max 
+// alignment requirement that is at least as big as the largest object in the tuple. 
+ 
+template<bool, class T1, class T2> struct pick_one; 
+template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; }; 
+template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; }; 
+ 
+template< template<class> class Selector, typename T1, typename T2 > 
+struct pick_max { 
+    typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type; 
+}; 
+ 
+template<typename T> struct size_of { static const int value = sizeof(T); }; 
+ 
+template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max { 
+    typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType; 
+    typedef typename std::tuple_element<N-1, Tuple>::type ThisType; 
+    typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type; 
+}; 
+ 
+template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> { 
+    typedef typename std::tuple_element<0, Tuple>::type type; 
+}; 
+ 
+// is the specified type included in a tuple? 
+template<class Q, size_t N, class Tuple> 
+struct is_element_of { 
+    typedef typename std::tuple_element<N-1, Tuple>::type T_i; 
+    static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value; 
+}; 
+ 
+template<class Q, class Tuple> 
+struct is_element_of<Q,0,Tuple> { 
+    typedef typename std::tuple_element<0, Tuple>::type T_i; 
+    static const bool value = std::is_same<Q,T_i>::value; 
+}; 
+ 
+// allow the construction of types that are listed tuple.  If a disallowed type 
+// construction is written, a method involving this type is created.  The 
+// type has no definition, so a syntax error is generated. 
+template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple; 
+ 
+template<typename T, bool BUILD_IT> struct do_if; 
+template<typename T> 
+struct do_if<T, true> { 
+    static void construct(void *mySpace, const T& x) { 
+        (void) new(mySpace) Wrapper<T>(x); 
+    } 
+}; 
+template<typename T> 
+struct do_if<T, false> { 
+    static void construct(void * /*mySpace*/, const T& x) { 
+        // This method is instantiated when the type T does not match any of the 
+        // element types in the Tuple in variant<Tuple>. 
+        ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x); 
+    } 
+}; 
+ 
+// Tuple tells us the allowed types that variant can hold.  It determines the alignment of the space in 
+// Wrapper, and how big Wrapper is. 
+// 
+// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>(). 
+ 
+using tbb::detail::punned_cast; 
+struct tagged_null_type {}; 
+template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type, 
+                           typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type, 
+                           typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type> 
+class tagged_msg { 
+    typedef std::tuple<T0, T1, T2, T3, T4 
+                  //TODO: Should we reject lists longer than a tuple can hold? 
+                  #if __TBB_VARIADIC_MAX >= 6 
+                  , T5 
+                  #endif 
+                  #if __TBB_VARIADIC_MAX >= 7 
+                  , T6 
+                  #endif 
+                  #if __TBB_VARIADIC_MAX >= 8 
+                  , T7 
+                  #endif 
+                  #if __TBB_VARIADIC_MAX >= 9 
+                  , T8 
+                  #endif 
+                  #if __TBB_VARIADIC_MAX >= 10 
+                  , T9 
+                  #endif 
+                  > Tuple; 
+ 
+private: 
+    class variant { 
+        static const size_t N = std::tuple_size<Tuple>::value; 
+        typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType; 
+        typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType; 
+        static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1); 
+        static const size_t MaxNElements = MaxNBytes/sizeof(AlignType); 
+        typedef aligned_space<AlignType, MaxNElements> SpaceType; 
+        SpaceType my_space; 
+        static const size_t MaxSize = sizeof(SpaceType); 
+ 
+    public: 
+        variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); } 
+ 
+        template<typename T> 
+        variant( const T& x ) { 
+            do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x); 
+        } 
+ 
+        variant(const variant& other) { 
+            const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space)); 
+            h->CopyTo(&my_space); 
+        } 
+ 
+        // assignment must destroy and re-create the Wrapper type, as there is no way 
+        // to create a Wrapper-to-Wrapper assign even if we find they agree in type. 
+        void operator=( const variant& rhs ) { 
+            if(&rhs != this) { 
+                WrapperBase *h = punned_cast<WrapperBase *>(&my_space); 
+                h->~WrapperBase(); 
+                const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space)); 
+                ch->CopyTo(&my_space); 
+            } 
+        } 
+ 
+        template<typename U> 
+        const U& variant_cast_to() const { 
+            const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)); 
+            if(!h) { 
+                throw_exception(exception_id::bad_tagged_msg_cast); 
+            } 
+            return h->value(); 
+        } 
+        template<typename U> 
+        bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; } 
+ 
+        bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();} 
+ 
+        ~variant() { 
+            WrapperBase *h = punned_cast<WrapperBase *>(&my_space); 
+            h->~WrapperBase(); 
+        } 
+    }; //class variant 
+ 
+    TagType my_tag; 
+    variant my_msg; 
+ 
+public: 
+    tagged_msg(): my_tag(TagType(~0)), my_msg(){} 
+ 
+    template<typename T, typename R> 
+    tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {} 
+ 
+    template<typename T, typename R, size_t N> 
+    tagged_msg(T const &index,  R (&value)[N]) : my_tag(index), my_msg(value) {} 
+ 
+    void set_tag(TagType const &index) {my_tag = index;} 
+    TagType tag() const {return my_tag;} 
+ 
+    template<typename V> 
+    const V& cast_to() const {return my_msg.template variant_cast_to<V>();} 
+ 
+    template<typename V> 
+    bool is_a() const {return my_msg.template variant_is_a<V>();} 
+ 
+    bool is_default_constructed() const {return my_msg.variant_is_default_constructed();} 
+}; //class tagged_msg 
+ 
+// template to simplify cast and test for tagged_msg in template contexts 
+template<typename V, typename T> 
+const V& cast_to(T const &t) { return t.template cast_to<V>(); } 
+ 
+template<typename V, typename T> 
+bool is_a(T const &t) { return t.template is_a<V>(); } 
+ 
+enum op_stat { WAIT = 0, SUCCEEDED, FAILED }; 
+ 
+#endif  /* __TBB__flow_graph_types_impl_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h
index 20cbd96c06..1c38b0dc2d 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h
@@ -1,127 +1,127 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__hash_compare_H
-#define __TBB_detail__hash_compare_H
-
-#include <functional>
-
-#include "_containers_helpers.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Key, typename Hash, typename KeyEqual>
-class hash_compare {
-    using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>;
-public:
-    using hasher = Hash;
-    using key_equal = typename is_transparent_hash::type;
-
-    hash_compare() = default;
-    hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {}
-
-    std::size_t operator()( const Key& key ) const {
-        return std::size_t(my_hasher(key));
-    }
-
-    bool operator()( const Key& key1, const Key& key2 ) const {
-        return my_equal(key1, key2);
-    }
-
-    template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type>
-    std::size_t operator()( const K& key ) const {
-        return std::size_t(my_hasher(key));
-    }
-
-    template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type>
-    bool operator()( const K1& key1, const K2& key2 ) const {
-        return my_equal(key1, key2);
-    }
-
-    hasher hash_function() const {
-        return my_hasher;
-    }
-
-    key_equal key_eq() const {
-        return my_equal;
-    }
-
-
-private:
-    hasher my_hasher;
-    key_equal my_equal;
-}; // class hash_compare
-
-//! hash_compare that is default argument for concurrent_hash_map
-template <typename Key>
-class tbb_hash_compare {
-public:
-    std::size_t hash( const Key& a ) const { return my_hash_func(a); }
-    bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); }
-private:
-    std::hash<Key> my_hash_func;
-    std::equal_to<Key> my_key_equal;
-};
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS
-
-namespace std {
-
-template <typename T, typename U>
-struct hash<std::pair<T, U>> {
-public:
-    std::size_t operator()( const std::pair<T, U>& p ) const {
-        return first_hash(p.first) ^ second_hash(p.second);
-    }
-
-private:
-    std::hash<T> first_hash;
-    std::hash<U> second_hash;
-}; // struct hash<std::pair>
-
-// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>>
-#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER)
-
-template <typename CharT, typename Traits, typename Allocator>
-struct hash<std::basic_string<CharT, Traits, Allocator>> {
-public:
-    std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const {
-        std::size_t h = 0;
-        for ( const CharT* c = s.c_str(); *c; ++c ) {
-            h = h * hash_multiplier ^ char_hash(*c);
-        }
-        return h;
-    }
-
-private:
-    static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value;
-
-    std::hash<CharT> char_hash;
-}; // struct hash<std::basic_string>
-
-#endif // !(_LIBCPP_VERSION || _CPPLIB_VER)
-
-} // namespace std
-
-#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS
-
-#endif // __TBB_detail__hash_compare_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__hash_compare_H 
+#define __TBB_detail__hash_compare_H 
+ 
+#include <functional> 
+ 
+#include "_containers_helpers.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Key, typename Hash, typename KeyEqual> 
+class hash_compare { 
+    using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>; 
+public: 
+    using hasher = Hash; 
+    using key_equal = typename is_transparent_hash::type; 
+ 
+    hash_compare() = default; 
+    hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {} 
+ 
+    std::size_t operator()( const Key& key ) const { 
+        return std::size_t(my_hasher(key)); 
+    } 
+ 
+    bool operator()( const Key& key1, const Key& key2 ) const { 
+        return my_equal(key1, key2); 
+    } 
+ 
+    template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type> 
+    std::size_t operator()( const K& key ) const { 
+        return std::size_t(my_hasher(key)); 
+    } 
+ 
+    template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type> 
+    bool operator()( const K1& key1, const K2& key2 ) const { 
+        return my_equal(key1, key2); 
+    } 
+ 
+    hasher hash_function() const { 
+        return my_hasher; 
+    } 
+ 
+    key_equal key_eq() const { 
+        return my_equal; 
+    } 
+ 
+ 
+private: 
+    hasher my_hasher; 
+    key_equal my_equal; 
+}; // class hash_compare 
+ 
+//! hash_compare that is default argument for concurrent_hash_map 
+template <typename Key> 
+class tbb_hash_compare { 
+public: 
+    std::size_t hash( const Key& a ) const { return my_hash_func(a); } 
+    bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); } 
+private: 
+    std::hash<Key> my_hash_func; 
+    std::equal_to<Key> my_key_equal; 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS 
+ 
+namespace std { 
+ 
+template <typename T, typename U> 
+struct hash<std::pair<T, U>> { 
+public: 
+    std::size_t operator()( const std::pair<T, U>& p ) const { 
+        return first_hash(p.first) ^ second_hash(p.second); 
+    } 
+ 
+private: 
+    std::hash<T> first_hash; 
+    std::hash<U> second_hash; 
+}; // struct hash<std::pair> 
+ 
+// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>> 
+#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER) 
+ 
+template <typename CharT, typename Traits, typename Allocator> 
+struct hash<std::basic_string<CharT, Traits, Allocator>> { 
+public: 
+    std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const { 
+        std::size_t h = 0; 
+        for ( const CharT* c = s.c_str(); *c; ++c ) { 
+            h = h * hash_multiplier ^ char_hash(*c); 
+        } 
+        return h; 
+    } 
+ 
+private: 
+    static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value; 
+ 
+    std::hash<CharT> char_hash; 
+}; // struct hash<std::basic_string> 
+ 
+#endif // !(_LIBCPP_VERSION || _CPPLIB_VER) 
+ 
+} // namespace std 
+ 
+#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS 
+ 
+#endif // __TBB_detail__hash_compare_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h
index 3270da786a..c4aad58dfc 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h
@@ -1,366 +1,366 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__machine_H
-#define __TBB_detail__machine_H
-
-#include "_config.h"
-#include "_assert.h"
-
-#include <atomic>
-#include <climits>
-#include <cstdint>
-#include <cstddef>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#pragma intrinsic(__rdtsc)
-#endif
-#if __TBB_x86_64 || __TBB_x86_32
-#include <immintrin.h> // _mm_pause
-#endif
-#if (_WIN32 || _WIN64)
-#include <float.h> // _control87
-#endif
-
-#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
-#include <sched.h> // sched_yield
-#else
-#include <thread> // std::this_thread::yield()
-#endif
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-//--------------------------------------------------------------------------------------------------
-// Yield implementation
-//--------------------------------------------------------------------------------------------------
-
-#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
-static inline void yield() {
-    int err = sched_yield();
-    __TBB_ASSERT_EX(err == 0, "sched_yiled has failed");
-}
-#else
-using std::this_thread::yield;
-#endif
-
-//--------------------------------------------------------------------------------------------------
-// atomic_fence implementation
-//--------------------------------------------------------------------------------------------------
-
-#if (_WIN32 || _WIN64)
-#pragma intrinsic(_mm_mfence)
-#endif
-
-static inline void atomic_fence(std::memory_order order) {
-#if (_WIN32 || _WIN64)
-    if (order == std::memory_order_seq_cst ||
-        order == std::memory_order_acq_rel ||
-        order == std::memory_order_acquire ||
-        order == std::memory_order_release )
-    {
-        _mm_mfence();
-        return;
-    }
-#endif /*(_WIN32 || _WIN64)*/
-    std::atomic_thread_fence(order);
-}
-
-//--------------------------------------------------------------------------------------------------
-// Pause implementation
-//--------------------------------------------------------------------------------------------------
-
-static inline void machine_pause(int32_t delay) {
-#if __TBB_x86_64 || __TBB_x86_32
-    while (delay-- > 0) { _mm_pause(); }
-#elif __ARM_ARCH_7A__ || __aarch64__
-    while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); }
-#else /* Generic */
-    (void)delay; // suppress without including _template_helpers.h
-    yield();
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// tbb::detail::log2() implementation
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// TODO: Use log2p1() function that will be available in C++20 standard
-
-#if defined(__GNUC__) || defined(__clang__)
-namespace gnu_builtins {
-    inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); }
-    inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); }
-    inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); }
-}
-#elif defined(_MSC_VER)
-#pragma intrinsic(__TBB_W(_BitScanReverse))
-namespace msvc_intrinsics {
-    static inline uintptr_t bit_scan_reverse(uintptr_t i) {
-        unsigned long j;
-        __TBB_W(_BitScanReverse)( &j, i );
-        return j;
-    }
-}
-#endif
-
-template <typename T>
-constexpr std::uintptr_t number_of_bits() {
-    return sizeof(T) * CHAR_BIT;
-}
-
-// logarithm is the index of the most significant non-zero bit
-static inline uintptr_t machine_log2(uintptr_t x) {
-#if defined(__GNUC__) || defined(__clang__)
-    // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x
-    return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x);
-#elif defined(_MSC_VER)
-    return msvc_intrinsics::bit_scan_reverse(x);
-#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__
-    uintptr_t j, i = x;
-    __asm__("bsr %1,%0" : "=r"(j) : "r"(i));
-    return j;
-#elif __powerpc__ || __POWERPC__
-    #if __TBB_WORDSIZE==8
-    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
-    return 63 - static_cast<intptr_t>(x);
-    #else
-    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
-    return 31 - static_cast<intptr_t>(x);
-    #endif /*__TBB_WORDSIZE*/
-#elif __sparc
-    uint64_t count;
-    // one hot encode
-    x |= (x >> 1);
-    x |= (x >> 2);
-    x |= (x >> 4);
-    x |= (x >> 8);
-    x |= (x >> 16);
-    x |= (x >> 32);
-    // count 1's
-    __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
-    return count - 1;
-#else
-    intptr_t result = 0;
-
-    if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; }
-    if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; }
-    if( uintptr_t tmp = x >> 8 )  { x = tmp; result += 8; }
-    if( uintptr_t tmp = x >> 4 )  { x = tmp; result += 4; }
-    if( uintptr_t tmp = x >> 2 )  { x = tmp; result += 2; }
-
-    return (x & 2) ? result + 1 : result;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// tbb::detail::reverse_bits() implementation
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#if TBB_USE_CLANG_BITREVERSE_BUILTINS
-namespace  llvm_builtins {
-    inline uint8_t  builtin_bitreverse(uint8_t  x) { return __builtin_bitreverse8 (x); }
-    inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); }
-    inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); }
-    inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); }
-}
-#else // generic
-template<typename T>
-struct reverse {
-    static const T byte_table[256];
-};
-
-template<typename T>
-const T reverse<T>::byte_table[256] = {
-    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
-    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
-    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
-    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
-    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
-    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
-    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
-    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
-    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
-    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
-    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
-    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
-    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
-    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
-    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
-    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
-};
-
-inline unsigned char reverse_byte(unsigned char src) {
-    return reverse<unsigned char>::byte_table[src];
-}
-#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS
-
-template<typename T>
-T machine_reverse_bits(T src) {
-#if TBB_USE_CLANG_BITREVERSE_BUILTINS
-    return builtin_bitreverse(fixed_width_cast(src));
-#else /* Generic */
-    T dst;
-    unsigned char *original = (unsigned char *) &src;
-    unsigned char *reversed = (unsigned char *) &dst;
-
-    for ( int i = sizeof(T) - 1; i >= 0; i-- ) {
-        reversed[i] = reverse_byte( original[sizeof(T) - i - 1] );
-    }
-
-    return dst;
-#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS
-}
-
-} // inline namespace d0
-
-namespace d1 {
-
-#if (_WIN32 || _WIN64)
-// API to retrieve/update FPU control setting
-#define __TBB_CPU_CTL_ENV_PRESENT 1
-struct cpu_ctl_env {
-    unsigned int x87cw{};
-#if (__TBB_x86_64)
-    // Changing the infinity mode or the floating-point precision is not supported on x64.
-    // The attempt causes an assertion. See
-    // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2
-    static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC;
-#else
-    static constexpr unsigned int X87CW_CONTROL_MASK = ~0U;
-#endif
-#if (__TBB_x86_32 || __TBB_x86_64)
-    unsigned int mxcsr{};
-    static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */
-#endif
-
-    bool operator!=( const cpu_ctl_env& ctl ) const {
-        return
-#if (__TBB_x86_32 || __TBB_x86_64)
-            mxcsr != ctl.mxcsr ||
-#endif
-            x87cw != ctl.x87cw;
-    }
-    void get_env() {
-        x87cw = _control87(0, 0);
-#if (__TBB_x86_32 || __TBB_x86_64)
-        mxcsr = _mm_getcsr();
-#endif
-    }
-    void set_env() const {
-        _control87(x87cw, X87CW_CONTROL_MASK);
-#if (__TBB_x86_32 || __TBB_x86_64)
-        _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK);
-#endif
-    }
-};
-#elif (__TBB_x86_32 || __TBB_x86_64)
-// API to retrieve/update FPU control setting
-#define __TBB_CPU_CTL_ENV_PRESENT 1
-struct cpu_ctl_env {
-    int     mxcsr{};
-    short   x87cw{};
-    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
-
-    bool operator!=(const cpu_ctl_env& ctl) const {
-        return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw;
-    }
-    void get_env() {
-        __asm__ __volatile__(
-            "stmxcsr %0\n\t"
-            "fstcw %1"
-            : "=m"(mxcsr), "=m"(x87cw)
-        );
-        mxcsr &= MXCSR_CONTROL_MASK;
-    }
-    void set_env() const {
-        __asm__ __volatile__(
-            "ldmxcsr %0\n\t"
-            "fldcw %1"
-            : : "m"(mxcsr), "m"(x87cw)
-        );
-    }
-};
-#endif
-
-} // namespace d1
-
-} // namespace detail
-} // namespace tbb
-
-#if !__TBB_CPU_CTL_ENV_PRESENT
-#include <fenv.h>
-
-#include <cstring>
-
-namespace tbb {
-namespace detail {
-
-namespace r1 {
-void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size);
-void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p);
-} // namespace r1
-
-namespace d1 {
-
-class cpu_ctl_env {
-    fenv_t *my_fenv_ptr;
-public:
-    cpu_ctl_env() : my_fenv_ptr(NULL) {}
-    ~cpu_ctl_env() {
-        if ( my_fenv_ptr )
-            r1::cache_aligned_deallocate( (void*)my_fenv_ptr );
-    }
-    // It is possible not to copy memory but just to copy pointers but the following issues should be addressed:
-    //   1. The arena lifetime and the context lifetime are independent;
-    //   2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside
-    //   dispatch loop may become invalid.
-    // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation
-    // with a platform specific implementation.
-    cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) {
-        *this = src;
-    }
-    cpu_ctl_env& operator=( const cpu_ctl_env &src ) {
-        __TBB_ASSERT( src.my_fenv_ptr, NULL );
-        if ( !my_fenv_ptr )
-            my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
-        *my_fenv_ptr = *src.my_fenv_ptr;
-        return *this;
-    }
-    bool operator!=( const cpu_ctl_env &ctl ) const {
-        __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
-        __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." );
-        return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) );
-    }
-    void get_env () {
-        if ( !my_fenv_ptr )
-            my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
-        fegetenv( my_fenv_ptr );
-    }
-    const cpu_ctl_env& set_env () const {
-        __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
-        fesetenv( my_fenv_ptr );
-        return *this;
-    }
-};
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
-
-#endif // __TBB_detail__machine_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__machine_H 
+#define __TBB_detail__machine_H 
+ 
+#include "_config.h" 
+#include "_assert.h" 
+ 
+#include <atomic> 
+#include <climits> 
+#include <cstdint> 
+#include <cstddef> 
+ 
+#ifdef _MSC_VER 
+#include <intrin.h> 
+#pragma intrinsic(__rdtsc) 
+#endif 
+#if __TBB_x86_64 || __TBB_x86_32 
+#include <immintrin.h> // _mm_pause 
+#endif 
+#if (_WIN32 || _WIN64) 
+#include <float.h> // _control87 
+#endif 
+ 
+#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 
+#include <sched.h> // sched_yield 
+#else 
+#include <thread> // std::this_thread::yield() 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+//-------------------------------------------------------------------------------------------------- 
+// Yield implementation 
+//-------------------------------------------------------------------------------------------------- 
+ 
+#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 
+static inline void yield() { 
+    int err = sched_yield(); 
+    __TBB_ASSERT_EX(err == 0, "sched_yiled has failed"); 
+} 
+#else 
+using std::this_thread::yield; 
+#endif 
+ 
+//-------------------------------------------------------------------------------------------------- 
+// atomic_fence implementation 
+//-------------------------------------------------------------------------------------------------- 
+ 
+#if (_WIN32 || _WIN64) 
+#pragma intrinsic(_mm_mfence) 
+#endif 
+ 
+static inline void atomic_fence(std::memory_order order) { 
+#if (_WIN32 || _WIN64) 
+    if (order == std::memory_order_seq_cst || 
+        order == std::memory_order_acq_rel || 
+        order == std::memory_order_acquire || 
+        order == std::memory_order_release ) 
+    { 
+        _mm_mfence(); 
+        return; 
+    } 
+#endif /*(_WIN32 || _WIN64)*/ 
+    std::atomic_thread_fence(order); 
+} 
+ 
+//-------------------------------------------------------------------------------------------------- 
+// Pause implementation 
+//-------------------------------------------------------------------------------------------------- 
+ 
+static inline void machine_pause(int32_t delay) { 
+#if __TBB_x86_64 || __TBB_x86_32 
+    while (delay-- > 0) { _mm_pause(); } 
+#elif __ARM_ARCH_7A__ || __aarch64__ 
+    while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } 
+#else /* Generic */ 
+    (void)delay; // suppress without including _template_helpers.h 
+    yield(); 
+#endif 
+} 
+ 
+//////////////////////////////////////////////////////////////////////////////////////////////////// 
+// tbb::detail::log2() implementation 
+//////////////////////////////////////////////////////////////////////////////////////////////////// 
+// TODO: Use log2p1() function that will be available in C++20 standard 
+ 
+#if defined(__GNUC__) || defined(__clang__) 
+namespace gnu_builtins { 
+    inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); } 
+    inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); } 
+    inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); } 
+} 
+#elif defined(_MSC_VER) 
+#pragma intrinsic(__TBB_W(_BitScanReverse)) 
+namespace msvc_intrinsics { 
+    static inline uintptr_t bit_scan_reverse(uintptr_t i) { 
+        unsigned long j; 
+        __TBB_W(_BitScanReverse)( &j, i ); 
+        return j; 
+    } 
+} 
+#endif 
+ 
+template <typename T> 
+constexpr std::uintptr_t number_of_bits() { 
+    return sizeof(T) * CHAR_BIT; 
+} 
+ 
+// logarithm is the index of the most significant non-zero bit 
+static inline uintptr_t machine_log2(uintptr_t x) { 
+#if defined(__GNUC__) || defined(__clang__) 
+    // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x 
+    return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x); 
+#elif defined(_MSC_VER) 
+    return msvc_intrinsics::bit_scan_reverse(x); 
+#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__ 
+    uintptr_t j, i = x; 
+    __asm__("bsr %1,%0" : "=r"(j) : "r"(i)); 
+    return j; 
+#elif __powerpc__ || __POWERPC__ 
+    #if __TBB_WORDSIZE==8 
+    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); 
+    return 63 - static_cast<intptr_t>(x); 
+    #else 
+    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); 
+    return 31 - static_cast<intptr_t>(x); 
+    #endif /*__TBB_WORDSIZE*/ 
+#elif __sparc 
+    uint64_t count; 
+    // one hot encode 
+    x |= (x >> 1); 
+    x |= (x >> 2); 
+    x |= (x >> 4); 
+    x |= (x >> 8); 
+    x |= (x >> 16); 
+    x |= (x >> 32); 
+    // count 1's 
+    __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); 
+    return count - 1; 
+#else 
+    intptr_t result = 0; 
+ 
+    if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; } 
+    if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; } 
+    if( uintptr_t tmp = x >> 8 )  { x = tmp; result += 8; } 
+    if( uintptr_t tmp = x >> 4 )  { x = tmp; result += 4; } 
+    if( uintptr_t tmp = x >> 2 )  { x = tmp; result += 2; } 
+ 
+    return (x & 2) ? result + 1 : result; 
+#endif 
+} 
+ 
+//////////////////////////////////////////////////////////////////////////////////////////////////// 
+// tbb::detail::reverse_bits() implementation 
+//////////////////////////////////////////////////////////////////////////////////////////////////// 
+#if TBB_USE_CLANG_BITREVERSE_BUILTINS 
+namespace  llvm_builtins { 
+    inline uint8_t  builtin_bitreverse(uint8_t  x) { return __builtin_bitreverse8 (x); } 
+    inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); } 
+    inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); } 
+    inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); } 
+} 
+#else // generic 
+template<typename T> 
+struct reverse { 
+    static const T byte_table[256]; 
+}; 
+ 
+template<typename T> 
+const T reverse<T>::byte_table[256] = { 
+    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 
+    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 
+    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 
+    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 
+    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 
+    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 
+    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 
+    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 
+    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 
+    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 
+    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 
+    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 
+    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 
+    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 
+    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 
+    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF 
+}; 
+ 
+inline unsigned char reverse_byte(unsigned char src) { 
+    return reverse<unsigned char>::byte_table[src]; 
+} 
+#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS 
+ 
+template<typename T> 
+T machine_reverse_bits(T src) { 
+#if TBB_USE_CLANG_BITREVERSE_BUILTINS 
+    return builtin_bitreverse(fixed_width_cast(src)); 
+#else /* Generic */ 
+    T dst; 
+    unsigned char *original = (unsigned char *) &src; 
+    unsigned char *reversed = (unsigned char *) &dst; 
+ 
+    for ( int i = sizeof(T) - 1; i >= 0; i-- ) { 
+        reversed[i] = reverse_byte( original[sizeof(T) - i - 1] ); 
+    } 
+ 
+    return dst; 
+#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS 
+} 
+ 
+} // inline namespace d0 
+ 
+namespace d1 { 
+ 
+#if (_WIN32 || _WIN64) 
+// API to retrieve/update FPU control setting 
+#define __TBB_CPU_CTL_ENV_PRESENT 1 
+struct cpu_ctl_env { 
+    unsigned int x87cw{}; 
+#if (__TBB_x86_64) 
+    // Changing the infinity mode or the floating-point precision is not supported on x64. 
+    // The attempt causes an assertion. See 
+    // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2 
+    static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC; 
+#else 
+    static constexpr unsigned int X87CW_CONTROL_MASK = ~0U; 
+#endif 
+#if (__TBB_x86_32 || __TBB_x86_64) 
+    unsigned int mxcsr{}; 
+    static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */ 
+#endif 
+ 
+    bool operator!=( const cpu_ctl_env& ctl ) const { 
+        return 
+#if (__TBB_x86_32 || __TBB_x86_64) 
+            mxcsr != ctl.mxcsr || 
+#endif 
+            x87cw != ctl.x87cw; 
+    } 
+    void get_env() { 
+        x87cw = _control87(0, 0); 
+#if (__TBB_x86_32 || __TBB_x86_64) 
+        mxcsr = _mm_getcsr(); 
+#endif 
+    } 
+    void set_env() const { 
+        _control87(x87cw, X87CW_CONTROL_MASK); 
+#if (__TBB_x86_32 || __TBB_x86_64) 
+        _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK); 
+#endif 
+    } 
+}; 
+#elif (__TBB_x86_32 || __TBB_x86_64) 
+// API to retrieve/update FPU control setting 
+#define __TBB_CPU_CTL_ENV_PRESENT 1 
+struct cpu_ctl_env { 
+    int     mxcsr{}; 
+    short   x87cw{}; 
+    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */ 
+ 
+    bool operator!=(const cpu_ctl_env& ctl) const { 
+        return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; 
+    } 
+    void get_env() { 
+        __asm__ __volatile__( 
+            "stmxcsr %0\n\t" 
+            "fstcw %1" 
+            : "=m"(mxcsr), "=m"(x87cw) 
+        ); 
+        mxcsr &= MXCSR_CONTROL_MASK; 
+    } 
+    void set_env() const { 
+        __asm__ __volatile__( 
+            "ldmxcsr %0\n\t" 
+            "fldcw %1" 
+            : : "m"(mxcsr), "m"(x87cw) 
+        ); 
+    } 
+}; 
+#endif 
+ 
+} // namespace d1 
+ 
+} // namespace detail 
+} // namespace tbb 
+ 
+#if !__TBB_CPU_CTL_ENV_PRESENT 
+#include <fenv.h> 
+ 
+#include <cstring> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace r1 { 
+void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); 
+void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); 
+} // namespace r1 
+ 
+namespace d1 { 
+ 
+class cpu_ctl_env { 
+    fenv_t *my_fenv_ptr; 
+public: 
+    cpu_ctl_env() : my_fenv_ptr(NULL) {} 
+    ~cpu_ctl_env() { 
+        if ( my_fenv_ptr ) 
+            r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); 
+    } 
+    // It is possible not to copy memory but just to copy pointers but the following issues should be addressed: 
+    //   1. The arena lifetime and the context lifetime are independent; 
+    //   2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside 
+    //   dispatch loop may become invalid. 
+    // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation 
+    // with a platform specific implementation. 
+    cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { 
+        *this = src; 
+    } 
+    cpu_ctl_env& operator=( const cpu_ctl_env &src ) { 
+        __TBB_ASSERT( src.my_fenv_ptr, NULL ); 
+        if ( !my_fenv_ptr ) 
+            my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); 
+        *my_fenv_ptr = *src.my_fenv_ptr; 
+        return *this; 
+    } 
+    bool operator!=( const cpu_ctl_env &ctl ) const { 
+        __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); 
+        __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." ); 
+        return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) ); 
+    } 
+    void get_env () { 
+        if ( !my_fenv_ptr ) 
+            my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); 
+        fegetenv( my_fenv_ptr ); 
+    } 
+    const cpu_ctl_env& set_env () const { 
+        __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); 
+        fesetenv( my_fenv_ptr ); 
+        return *this; 
+    } 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif /* !__TBB_CPU_CTL_ENV_PRESENT */ 
+ 
+#endif // __TBB_detail__machine_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h
index 2e1df30931..325af0a680 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h
@@ -1,24 +1,24 @@
-/*
-    Copyright (c) 2020-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-// All public entities of the OneAPI Spec are available under oneapi namespace
-
-// Define tbb namespace first as it might not be known yet
-namespace tbb {}
-
-namespace oneapi {
-namespace tbb = ::tbb;
-}
+/* 
+    Copyright (c) 2020-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+// All public entities of the OneAPI Spec are available under oneapi namespace 
+ 
+// Define tbb namespace first as it might not be known yet 
+namespace tbb {} 
+ 
+namespace oneapi { 
+namespace tbb = ::tbb; 
+} 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h
index 265be07555..d669c1f721 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h
@@ -1,162 +1,162 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__node_handle_H
-#define __TBB_detail__node_handle_H
-
-#include "_allocator_traits.h"
-#include "_assert.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-// A structure to access private node handle methods in internal TBB classes
-// Regular friend declaration is not convenient because classes which use node handle
-// can be placed in the different versioning namespaces.
-struct node_handle_accessor {
-    template <typename NodeHandleType>
-    static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) {
-        return nh.get_node_ptr();
-    }
-
-    template <typename NodeHandleType>
-    static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) {
-        return NodeHandleType{node_ptr};
-    }
-
-    template <typename NodeHandleType>
-    static void deactivate( NodeHandleType& nh ) {
-        nh.deactivate();
-    }
-}; // struct node_handle_accessor
-
-template<typename Value, typename Node, typename Allocator>
-class node_handle_base {
-public:
-    using allocator_type = Allocator;
-protected:
-    using node = Node;
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-public:
-
-    node_handle_base() : my_node(nullptr), my_allocator() {}
-    node_handle_base(node_handle_base&& nh) : my_node(nh.my_node),
-                                              my_allocator(std::move(nh.my_allocator)) {
-        nh.my_node = nullptr;
-    }
-
-    __TBB_nodiscard bool empty() const { return my_node == nullptr; }
-    explicit operator bool() const { return my_node != nullptr; }
-
-    ~node_handle_base() { internal_destroy(); }
-
-    node_handle_base& operator=( node_handle_base&& nh ) {
-        internal_destroy();
-        my_node = nh.my_node;
-        move_assign_allocators(my_allocator, nh.my_allocator);
-        nh.deactivate();
-        return *this;
-    }
-
-    void swap( node_handle_base& nh ) {
-        using std::swap;
-        swap(my_node, nh.my_node);
-        swap_allocators(my_allocator, nh.my_allocator);
-    }
-
-    allocator_type get_allocator() const {
-        return my_allocator;
-    }
-
-protected:
-    node_handle_base( node* n ) : my_node(n) {}
-
-    void internal_destroy() {
-        if(my_node != nullptr) {
-            allocator_traits_type::destroy(my_allocator, my_node->storage());
-            typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator);
-            node_allocator.deallocate(my_node, 1);
-        }
-    }
-
-    node* get_node_ptr() { return my_node; }
-
-    void deactivate() { my_node = nullptr; }
-
-    node* my_node;
-    allocator_type my_allocator;
-};
-
-// node handle for maps
-template<typename Key, typename Value, typename Node, typename Allocator>
-class node_handle : public node_handle_base<Value, Node, Allocator> {
-    using base_type = node_handle_base<Value, Node, Allocator>;
-public:
-    using key_type = Key;
-    using mapped_type = typename Value::second_type;
-    using allocator_type = typename base_type::allocator_type;
-
-    node_handle() = default;
-
-    key_type& key() const {
-        __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object");
-        return *const_cast<key_type*>(&(this->my_node->value().first));
-    }
-
-    mapped_type& mapped() const {
-        __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object");
-        return this->my_node->value().second;
-    }
-
-private:
-    friend struct node_handle_accessor;
-
-    node_handle( typename base_type::node* n ) : base_type(n) {}
-}; // class node_handle
-
-// node handle for sets
-template<typename Key, typename Node, typename Allocator>
-class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> {
-    using base_type = node_handle_base<Key, Node, Allocator>;
-public:
-    using value_type = Key;
-    using allocator_type = typename base_type::allocator_type;
-
-    node_handle() = default;
-
-    value_type& value() const {
-        __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object");
-        return *const_cast<value_type*>(&(this->my_node->value()));
-    }
-
-private:
-    friend struct node_handle_accessor;
-
-    node_handle( typename base_type::node* n ) : base_type(n) {}
-}; // class node_handle
-
-template <typename Key, typename Value, typename Node, typename Allocator>
-void swap( node_handle<Key, Value, Node, Allocator>& lhs,
-           node_handle<Key, Value, Node, Allocator>& rhs ) {
-    return lhs.swap(rhs);
-}
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__node_handle_H
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__node_handle_H 
+#define __TBB_detail__node_handle_H 
+ 
+#include "_allocator_traits.h" 
+#include "_assert.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+// A structure to access private node handle methods in internal TBB classes 
+// Regular friend declaration is not convenient because classes which use node handle 
+// can be placed in the different versioning namespaces. 
+struct node_handle_accessor { 
+    template <typename NodeHandleType> 
+    static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) { 
+        return nh.get_node_ptr(); 
+    } 
+ 
+    template <typename NodeHandleType> 
+    static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) { 
+        return NodeHandleType{node_ptr}; 
+    } 
+ 
+    template <typename NodeHandleType> 
+    static void deactivate( NodeHandleType& nh ) { 
+        nh.deactivate(); 
+    } 
+}; // struct node_handle_accessor 
+ 
+template<typename Value, typename Node, typename Allocator> 
+class node_handle_base { 
+public: 
+    using allocator_type = Allocator; 
+protected: 
+    using node = Node; 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+public: 
+ 
+    node_handle_base() : my_node(nullptr), my_allocator() {} 
+    node_handle_base(node_handle_base&& nh) : my_node(nh.my_node), 
+                                              my_allocator(std::move(nh.my_allocator)) { 
+        nh.my_node = nullptr; 
+    } 
+ 
+    __TBB_nodiscard bool empty() const { return my_node == nullptr; } 
+    explicit operator bool() const { return my_node != nullptr; } 
+ 
+    ~node_handle_base() { internal_destroy(); } 
+ 
+    node_handle_base& operator=( node_handle_base&& nh ) { 
+        internal_destroy(); 
+        my_node = nh.my_node; 
+        move_assign_allocators(my_allocator, nh.my_allocator); 
+        nh.deactivate(); 
+        return *this; 
+    } 
+ 
+    void swap( node_handle_base& nh ) { 
+        using std::swap; 
+        swap(my_node, nh.my_node); 
+        swap_allocators(my_allocator, nh.my_allocator); 
+    } 
+ 
+    allocator_type get_allocator() const { 
+        return my_allocator; 
+    } 
+ 
+protected: 
+    node_handle_base( node* n ) : my_node(n) {} 
+ 
+    void internal_destroy() { 
+        if(my_node != nullptr) { 
+            allocator_traits_type::destroy(my_allocator, my_node->storage()); 
+            typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator); 
+            node_allocator.deallocate(my_node, 1); 
+        } 
+    } 
+ 
+    node* get_node_ptr() { return my_node; } 
+ 
+    void deactivate() { my_node = nullptr; } 
+ 
+    node* my_node; 
+    allocator_type my_allocator; 
+}; 
+ 
+// node handle for maps 
+template<typename Key, typename Value, typename Node, typename Allocator> 
+class node_handle : public node_handle_base<Value, Node, Allocator> { 
+    using base_type = node_handle_base<Value, Node, Allocator>; 
+public: 
+    using key_type = Key; 
+    using mapped_type = typename Value::second_type; 
+    using allocator_type = typename base_type::allocator_type; 
+ 
+    node_handle() = default; 
+ 
+    key_type& key() const { 
+        __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object"); 
+        return *const_cast<key_type*>(&(this->my_node->value().first)); 
+    } 
+ 
+    mapped_type& mapped() const { 
+        __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object"); 
+        return this->my_node->value().second; 
+    } 
+ 
+private: 
+    friend struct node_handle_accessor; 
+ 
+    node_handle( typename base_type::node* n ) : base_type(n) {} 
+}; // class node_handle 
+ 
+// node handle for sets 
+template<typename Key, typename Node, typename Allocator> 
+class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> { 
+    using base_type = node_handle_base<Key, Node, Allocator>; 
+public: 
+    using value_type = Key; 
+    using allocator_type = typename base_type::allocator_type; 
+ 
+    node_handle() = default; 
+ 
+    value_type& value() const { 
+        __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object"); 
+        return *const_cast<value_type*>(&(this->my_node->value())); 
+    } 
+ 
+private: 
+    friend struct node_handle_accessor; 
+ 
+    node_handle( typename base_type::node* n ) : base_type(n) {} 
+}; // class node_handle 
+ 
+template <typename Key, typename Value, typename Node, typename Allocator> 
+void swap( node_handle<Key, Value, Node, Allocator>& lhs, 
+           node_handle<Key, Value, Node, Allocator>& rhs ) { 
+    return lhs.swap(rhs); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__node_handle_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h
index 95a4d3dc96..a1ce306c14 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h
@@ -1,453 +1,453 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_filters_H
-#define __TBB_parallel_filters_H
-
-#include "_config.h"
-#include "_task.h"
-#include "_pipeline_filters_deduction.h"
-#include "../tbb_allocator.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class base_filter;
-}
-
-namespace r1 {
-void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&);
-class pipeline;
-class stage_task;
-class input_buffer;
-}
-
-namespace d1 {
-class filter_node;
-
-//! A stage in a pipeline.
-/** @ingroup algorithms */
-class base_filter{
-private:
-    //! Value used to mark "not in pipeline"
-    static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); }
-public:
-    //! The lowest bit 0 is for parallel vs serial
-    static constexpr  unsigned int filter_is_serial = 0x1;
-
-    //! 2nd bit distinguishes ordered vs unordered filters.
-    static constexpr  unsigned int filter_is_out_of_order = 0x1<<1;
-
-    //! 3rd bit marks input filters emitting small objects
-    static constexpr  unsigned int filter_may_emit_null = 0x1<<2;
-
-    base_filter(const base_filter&) = delete;
-    base_filter& operator=(const base_filter&) = delete;
-
-protected:
-    explicit base_filter( unsigned int m ) :
-        next_filter_in_pipeline(not_in_pipeline()),
-        my_input_buffer(nullptr),
-        my_filter_mode(m),
-        my_pipeline(nullptr)
-    {}
-
-    // signal end-of-input for concrete_filters
-    void set_end_of_input() {
-        r1::set_end_of_input(*this);
-    }
-
-public:
-    //! True if filter is serial.
-    bool is_serial() const {
-        return bool( my_filter_mode & filter_is_serial );
-    }
-
-    //! True if filter must receive stream in order.
-    bool is_ordered() const {
-        return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order);
-    }
-
-    //! true if an input filter can emit null
-    bool object_may_be_null() {
-        return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
-    }
-
-    //! Operate on an item from the input stream, and return item for output stream.
-    /** Returns nullptr if filter is a sink. */
-    virtual void* operator()( void* item ) = 0;
-
-    //! Destroy filter.
-    virtual ~base_filter() {};
-
-    //! Destroys item if pipeline was cancelled.
-    /** Required to prevent memory leaks.
-        Note it can be called concurrently even for serial filters.*/
-    virtual void finalize( void* /*item*/ ) {}
-
-private:
-    //! Pointer to next filter in the pipeline.
-    base_filter* next_filter_in_pipeline;
-
-    //! Buffer for incoming tokens, or nullptr if not required.
-    /** The buffer is required if the filter is serial. */
-    r1::input_buffer* my_input_buffer;
-
-    friend class r1::stage_task;
-    friend class r1::pipeline;
-    friend void r1::set_end_of_input(d1::base_filter&);
-
-    //! Storage for filter mode and dynamically checked implementation version.
-    const unsigned int my_filter_mode;
-
-    //! Pointer to the pipeline.
-    r1::pipeline* my_pipeline;
-};
-
-template<typename Body, typename InputType, typename OutputType >
-class concrete_filter;
-
-//! input_filter control to signal end-of-input for parallel_pipeline
-class flow_control {
-    bool is_pipeline_stopped = false;
-    flow_control() = default;
-    template<typename Body, typename InputType, typename OutputType > friend class concrete_filter;
-    template<typename Output> friend class input_node;
-public:
-    void stop() { is_pipeline_stopped = true; }
-};
-
-// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe).
-#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT
-template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>;
-#else
-template<typename T> struct tbb_trivially_copyable                      { enum { value = false }; };
-template<typename T> struct tbb_trivially_copyable <         T*       > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         bool     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         char     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <  signed char     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <unsigned char     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         short    > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <unsigned short    > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         int      > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <unsigned int      > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         long     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <unsigned long     > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         long long> { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <unsigned long long> { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         float    > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <         double   > { enum { value = true  }; };
-template<>           struct tbb_trivially_copyable <    long double   > { enum { value = true  }; };
-#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT
-
-template<typename T>
-struct use_allocator {
-   static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value;
-};
-
-// A helper class to customize how a type is passed between filters.
-// Usage: token_helper<T, use_allocator<T>::value>
-template<typename T, bool Allocate> struct token_helper;
-
-// using tbb_allocator
-template<typename T>
-struct token_helper<T, true> {
-    using pointer = T*;
-    using value_type = T;
-    static pointer create_token(value_type && source) {
-        return new (r1::allocate_memory(sizeof(T))) T(std::move(source));
-    }
-    static value_type & token(pointer & t) { return *t; }
-    static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); }
-    static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); }
-    static void destroy_token(pointer token) {
-        token->~value_type();
-        r1::deallocate_memory(token);
-    }
-};
-
-// pointer specialization
-template<typename T>
-struct token_helper<T*, false> {
-    using pointer = T*;
-    using value_type = T*;
-    static pointer create_token(const value_type & source) { return source; }
-    static value_type & token(pointer & t) { return t; }
-    static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); }
-    static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); }
-    static void destroy_token( pointer /*token*/) {}
-};
-
-// converting type to and from void*, passing objects directly
-template<typename T>
-struct token_helper<T, false> {
-    typedef union {
-        T actual_value;
-        void * void_overlay;
-    } type_to_void_ptr_map;
-    using pointer = T;  // not really a pointer in this case.
-    using value_type = T;
-    static pointer create_token(const value_type & source) { return source; }
-    static value_type & token(pointer & t) { return t; }
-    static void * cast_to_void_ptr(pointer ref) {
-        type_to_void_ptr_map mymap;
-        mymap.void_overlay = nullptr;
-        mymap.actual_value = ref;
-        return mymap.void_overlay;
-    }
-    static pointer cast_from_void_ptr(void * ref) {
-        type_to_void_ptr_map mymap;
-        mymap.void_overlay = ref;
-        return mymap.actual_value;
-    }
-    static void destroy_token( pointer /*token*/) {}
-};
-
-// intermediate
-template<typename InputType,  typename OutputType, typename Body>
-class concrete_filter: public base_filter {
-    const Body& my_body;
-    using input_helper = token_helper<InputType, use_allocator<InputType >::value>;
-    using input_pointer = typename input_helper::pointer;
-    using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>;
-    using output_pointer = typename output_helper::pointer;
-
-    void* operator()(void* input) override {
-        input_pointer temp_input = input_helper::cast_from_void_ptr(input);
-        output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input))));
-        input_helper::destroy_token(temp_input);
-        return output_helper::cast_to_void_ptr(temp_output);
-    }
-
-    void finalize(void * input) override {
-        input_pointer temp_input = input_helper::cast_from_void_ptr(input);
-        input_helper::destroy_token(temp_input);
-    }
-
-public:
-    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
-};
-
-// input
-template<typename OutputType, typename Body>
-class concrete_filter<void, OutputType, Body>: public base_filter {
-    const Body& my_body;
-    using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>;
-    using output_pointer = typename output_helper::pointer;
-
-    void* operator()(void*) override {
-        flow_control control;
-        output_pointer temp_output = output_helper::create_token(my_body(control));
-        if(control.is_pipeline_stopped) {
-            output_helper::destroy_token(temp_output);
-            set_end_of_input();
-            return nullptr;
-        }
-        return output_helper::cast_to_void_ptr(temp_output);
-    }
-
-public:
-    concrete_filter(unsigned int m, const Body& body) :
-        base_filter(m | filter_may_emit_null),
-        my_body(body)
-    {}
-};
-
-// output
-template<typename InputType, typename Body>
-class concrete_filter<InputType, void, Body>: public base_filter {
-    const Body& my_body;
-    using input_helper = token_helper<InputType, use_allocator<InputType >::value>;
-    using input_pointer = typename input_helper::pointer;
-
-    void* operator()(void* input) override {
-        input_pointer temp_input = input_helper::cast_from_void_ptr(input);
-        my_body(std::move(input_helper::token(temp_input)));
-        input_helper::destroy_token(temp_input);
-        return nullptr;
-    }
-    void finalize(void* input) override {
-        input_pointer temp_input = input_helper::cast_from_void_ptr(input);
-        input_helper::destroy_token(temp_input);
-    }
-
-public:
-    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
-};
-
-template<typename Body>
-class concrete_filter<void, void, Body>: public base_filter {
-    const Body& my_body;
-
-    void* operator()(void*) override {
-        flow_control control;
-        my_body(control);
-        void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1;
-        return output;
-    }
-public:
-    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
-};
-
-class filter_node_ptr {
-    filter_node * my_node;
-
-public:
-    filter_node_ptr() : my_node(nullptr) {}
-    filter_node_ptr(filter_node *);
-    ~filter_node_ptr();
-    filter_node_ptr(const filter_node_ptr &);
-    filter_node_ptr(filter_node_ptr &&);
-    void operator=(filter_node *);
-    void operator=(const filter_node_ptr &);
-    void operator=(filter_node_ptr &&);
-    filter_node& operator*() const;
-    operator bool() const;
-};
-
-//! Abstract base class that represents a node in a parse tree underlying a filter class.
-/** These nodes are always heap-allocated and can be shared by filter objects. */
-class filter_node {
-    /** Count must be atomic because it is hidden state for user, but might be shared by threads. */
-    std::atomic<std::intptr_t> ref_count;
-public:
-    filter_node_ptr left;
-    filter_node_ptr right;
-protected:
-    filter_node() : ref_count(0), left(nullptr), right(nullptr) {
-#ifdef __TBB_TEST_FILTER_NODE_COUNT
-        ++(__TBB_TEST_FILTER_NODE_COUNT);
-#endif
-    }
-public:
-    filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){
-        left = x;
-        right = y;
-    }
-    filter_node(const filter_node&) = delete;
-    filter_node& operator=(const filter_node&) = delete;
-
-    //! Add concrete_filter to pipeline
-    virtual base_filter* create_filter() const {
-        __TBB_ASSERT(false, "method of non-leaf was called");
-        return nullptr;
-    }
-
-    //! Increment reference count
-    void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); }
-
-    //! Decrement reference count and delete if it becomes zero.
-    void remove_ref() {
-        __TBB_ASSERT(ref_count>0,"ref_count underflow");
-        if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) {
-            this->~filter_node();
-            r1::deallocate_memory(this);
-        }
-    }
-
-    virtual ~filter_node() {
-#ifdef __TBB_TEST_FILTER_NODE_COUNT
-        --(__TBB_TEST_FILTER_NODE_COUNT);
-#endif
-    }
-};
-
-inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) {
-    if (my_node) {
-        my_node->add_ref();
-    }
-}
-
-inline filter_node_ptr::~filter_node_ptr() {
-    if (my_node) {
-        my_node->remove_ref();
-    }
-}
-
-inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) {
-    if (my_node) {
-        my_node->add_ref();
-    }
-}
-
-inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) {
-    rhs.my_node = nullptr;
-}
-
-inline void filter_node_ptr::operator=(filter_node * rhs) {
-    // Order of operations below carefully chosen so that reference counts remain correct
-    // in unlikely event that remove_ref throws exception.
-    filter_node* old = my_node;
-    my_node = rhs;
-    if (my_node) {
-        my_node->add_ref();
-    }
-    if (old) {
-        old->remove_ref();
-    }
-}
-
-inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) {
-    *this = rhs.my_node;
-}
-
-inline void filter_node_ptr::operator=(filter_node_ptr && rhs) {
-    filter_node* old = my_node;
-    my_node = rhs.my_node;
-    rhs.my_node = nullptr;
-    if (old) {
-        old->remove_ref();
-    }
-}
-
-inline filter_node& filter_node_ptr::operator*() const{
-    __TBB_ASSERT(my_node,"NULL node is used");
-    return *my_node;
-}
-
-inline filter_node_ptr::operator bool() const {
-    return my_node != nullptr;
-}
-
-//! Node in parse tree representing result of make_filter.
-template<typename InputType, typename OutputType, typename Body>
-class filter_node_leaf: public filter_node {
-    const unsigned int my_mode;
-    const Body my_body;
-    base_filter* create_filter() const override {
-        return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body);
-    }
-public:
-    filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {}
-};
-
-
-template <typename Body, typename Input = typename body_types<decltype(&Body::operator())>::input_type>
-using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type;
-
-template <typename Body>
-using filter_output = typename body_types<decltype(&Body::operator())>::output_type;
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-
-#endif /* __TBB_parallel_filters_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_filters_H 
+#define __TBB_parallel_filters_H 
+ 
+#include "_config.h" 
+#include "_task.h" 
+#include "_pipeline_filters_deduction.h" 
+#include "../tbb_allocator.h" 
+ 
+#include <cstddef> 
+#include <cstdint> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class base_filter; 
+} 
+ 
+namespace r1 { 
+void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); 
+class pipeline; 
+class stage_task; 
+class input_buffer; 
+} 
+ 
+namespace d1 { 
+class filter_node; 
+ 
+//! A stage in a pipeline. 
+/** @ingroup algorithms */ 
+class base_filter{ 
+private: 
+    //! Value used to mark "not in pipeline" 
+    static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); } 
+public: 
+    //! The lowest bit 0 is for parallel vs serial 
+    static constexpr  unsigned int filter_is_serial = 0x1; 
+ 
+    //! 2nd bit distinguishes ordered vs unordered filters. 
+    static constexpr  unsigned int filter_is_out_of_order = 0x1<<1; 
+ 
+    //! 3rd bit marks input filters emitting small objects 
+    static constexpr  unsigned int filter_may_emit_null = 0x1<<2; 
+ 
+    base_filter(const base_filter&) = delete; 
+    base_filter& operator=(const base_filter&) = delete; 
+ 
+protected: 
+    explicit base_filter( unsigned int m ) : 
+        next_filter_in_pipeline(not_in_pipeline()), 
+        my_input_buffer(nullptr), 
+        my_filter_mode(m), 
+        my_pipeline(nullptr) 
+    {} 
+ 
+    // signal end-of-input for concrete_filters 
+    void set_end_of_input() { 
+        r1::set_end_of_input(*this); 
+    } 
+ 
+public: 
+    //! True if filter is serial. 
+    bool is_serial() const { 
+        return bool( my_filter_mode & filter_is_serial ); 
+    } 
+ 
+    //! True if filter must receive stream in order. 
+    bool is_ordered() const { 
+        return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order); 
+    } 
+ 
+    //! true if an input filter can emit null 
+    bool object_may_be_null() { 
+        return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null; 
+    } 
+ 
+    //! Operate on an item from the input stream, and return item for output stream. 
+    /** Returns nullptr if filter is a sink. */ 
+    virtual void* operator()( void* item ) = 0; 
+ 
+    //! Destroy filter. 
+    virtual ~base_filter() {}; 
+ 
+    //! Destroys item if pipeline was cancelled. 
+    /** Required to prevent memory leaks. 
+        Note it can be called concurrently even for serial filters.*/ 
+    virtual void finalize( void* /*item*/ ) {} 
+ 
+private: 
+    //! Pointer to next filter in the pipeline. 
+    base_filter* next_filter_in_pipeline; 
+ 
+    //! Buffer for incoming tokens, or nullptr if not required. 
+    /** The buffer is required if the filter is serial. */ 
+    r1::input_buffer* my_input_buffer; 
+ 
+    friend class r1::stage_task; 
+    friend class r1::pipeline; 
+    friend void r1::set_end_of_input(d1::base_filter&); 
+ 
+    //! Storage for filter mode and dynamically checked implementation version. 
+    const unsigned int my_filter_mode; 
+ 
+    //! Pointer to the pipeline. 
+    r1::pipeline* my_pipeline; 
+}; 
+ 
+template<typename Body, typename InputType, typename OutputType > 
+class concrete_filter; 
+ 
+//! input_filter control to signal end-of-input for parallel_pipeline 
+class flow_control { 
+    bool is_pipeline_stopped = false; 
+    flow_control() = default; 
+    template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; 
+    template<typename Output> friend class input_node; 
+public: 
+    void stop() { is_pipeline_stopped = true; } 
+}; 
+ 
+// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe). 
+#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT 
+template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>; 
+#else 
+template<typename T> struct tbb_trivially_copyable                      { enum { value = false }; }; 
+template<typename T> struct tbb_trivially_copyable <         T*       > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         bool     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         char     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <  signed char     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <unsigned char     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         short    > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <unsigned short    > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         int      > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <unsigned int      > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         long     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <unsigned long     > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         long long> { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <unsigned long long> { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         float    > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <         double   > { enum { value = true  }; }; 
+template<>           struct tbb_trivially_copyable <    long double   > { enum { value = true  }; }; 
+#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT 
+ 
+template<typename T> 
+struct use_allocator { 
+   static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value; 
+}; 
+ 
+// A helper class to customize how a type is passed between filters. 
+// Usage: token_helper<T, use_allocator<T>::value> 
+template<typename T, bool Allocate> struct token_helper; 
+ 
+// using tbb_allocator 
+template<typename T> 
+struct token_helper<T, true> { 
+    using pointer = T*; 
+    using value_type = T; 
+    static pointer create_token(value_type && source) { 
+        return new (r1::allocate_memory(sizeof(T))) T(std::move(source)); 
+    } 
+    static value_type & token(pointer & t) { return *t; } 
+    static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } 
+    static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } 
+    static void destroy_token(pointer token) { 
+        token->~value_type(); 
+        r1::deallocate_memory(token); 
+    } 
+}; 
+ 
+// pointer specialization 
+template<typename T> 
+struct token_helper<T*, false> { 
+    using pointer = T*; 
+    using value_type = T*; 
+    static pointer create_token(const value_type & source) { return source; } 
+    static value_type & token(pointer & t) { return t; } 
+    static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } 
+    static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } 
+    static void destroy_token( pointer /*token*/) {} 
+}; 
+ 
+// converting type to and from void*, passing objects directly 
+template<typename T> 
+struct token_helper<T, false> { 
+    typedef union { 
+        T actual_value; 
+        void * void_overlay; 
+    } type_to_void_ptr_map; 
+    using pointer = T;  // not really a pointer in this case. 
+    using value_type = T; 
+    static pointer create_token(const value_type & source) { return source; } 
+    static value_type & token(pointer & t) { return t; } 
+    static void * cast_to_void_ptr(pointer ref) { 
+        type_to_void_ptr_map mymap; 
+        mymap.void_overlay = nullptr; 
+        mymap.actual_value = ref; 
+        return mymap.void_overlay; 
+    } 
+    static pointer cast_from_void_ptr(void * ref) { 
+        type_to_void_ptr_map mymap; 
+        mymap.void_overlay = ref; 
+        return mymap.actual_value; 
+    } 
+    static void destroy_token( pointer /*token*/) {} 
+}; 
+ 
+// intermediate 
+template<typename InputType,  typename OutputType, typename Body> 
+class concrete_filter: public base_filter { 
+    const Body& my_body; 
+    using input_helper = token_helper<InputType, use_allocator<InputType >::value>; 
+    using input_pointer = typename input_helper::pointer; 
+    using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; 
+    using output_pointer = typename output_helper::pointer; 
+ 
+    void* operator()(void* input) override { 
+        input_pointer temp_input = input_helper::cast_from_void_ptr(input); 
+        output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input)))); 
+        input_helper::destroy_token(temp_input); 
+        return output_helper::cast_to_void_ptr(temp_output); 
+    } 
+ 
+    void finalize(void * input) override { 
+        input_pointer temp_input = input_helper::cast_from_void_ptr(input); 
+        input_helper::destroy_token(temp_input); 
+    } 
+ 
+public: 
+    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} 
+}; 
+ 
+// input 
+template<typename OutputType, typename Body> 
+class concrete_filter<void, OutputType, Body>: public base_filter { 
+    const Body& my_body; 
+    using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; 
+    using output_pointer = typename output_helper::pointer; 
+ 
+    void* operator()(void*) override { 
+        flow_control control; 
+        output_pointer temp_output = output_helper::create_token(my_body(control)); 
+        if(control.is_pipeline_stopped) { 
+            output_helper::destroy_token(temp_output); 
+            set_end_of_input(); 
+            return nullptr; 
+        } 
+        return output_helper::cast_to_void_ptr(temp_output); 
+    } 
+ 
+public: 
+    concrete_filter(unsigned int m, const Body& body) : 
+        base_filter(m | filter_may_emit_null), 
+        my_body(body) 
+    {} 
+}; 
+ 
+// output 
+template<typename InputType, typename Body> 
+class concrete_filter<InputType, void, Body>: public base_filter { 
+    const Body& my_body; 
+    using input_helper = token_helper<InputType, use_allocator<InputType >::value>; 
+    using input_pointer = typename input_helper::pointer; 
+ 
+    void* operator()(void* input) override { 
+        input_pointer temp_input = input_helper::cast_from_void_ptr(input); 
+        my_body(std::move(input_helper::token(temp_input))); 
+        input_helper::destroy_token(temp_input); 
+        return nullptr; 
+    } 
+    void finalize(void* input) override { 
+        input_pointer temp_input = input_helper::cast_from_void_ptr(input); 
+        input_helper::destroy_token(temp_input); 
+    } 
+ 
+public: 
+    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} 
+}; 
+ 
+template<typename Body> 
+class concrete_filter<void, void, Body>: public base_filter { 
+    const Body& my_body; 
+ 
+    void* operator()(void*) override { 
+        flow_control control; 
+        my_body(control); 
+        void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1; 
+        return output; 
+    } 
+public: 
+    concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} 
+}; 
+ 
+class filter_node_ptr { 
+    filter_node * my_node; 
+ 
+public: 
+    filter_node_ptr() : my_node(nullptr) {} 
+    filter_node_ptr(filter_node *); 
+    ~filter_node_ptr(); 
+    filter_node_ptr(const filter_node_ptr &); 
+    filter_node_ptr(filter_node_ptr &&); 
+    void operator=(filter_node *); 
+    void operator=(const filter_node_ptr &); 
+    void operator=(filter_node_ptr &&); 
+    filter_node& operator*() const; 
+    operator bool() const; 
+}; 
+ 
+//! Abstract base class that represents a node in a parse tree underlying a filter class. 
+/** These nodes are always heap-allocated and can be shared by filter objects. */ 
+class filter_node { 
+    /** Count must be atomic because it is hidden state for user, but might be shared by threads. */ 
+    std::atomic<std::intptr_t> ref_count; 
+public: 
+    filter_node_ptr left; 
+    filter_node_ptr right; 
+protected: 
+    filter_node() : ref_count(0), left(nullptr), right(nullptr) { 
+#ifdef __TBB_TEST_FILTER_NODE_COUNT 
+        ++(__TBB_TEST_FILTER_NODE_COUNT); 
+#endif 
+    } 
+public: 
+    filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){ 
+        left = x; 
+        right = y; 
+    } 
+    filter_node(const filter_node&) = delete; 
+    filter_node& operator=(const filter_node&) = delete; 
+ 
+    //! Add concrete_filter to pipeline 
+    virtual base_filter* create_filter() const { 
+        __TBB_ASSERT(false, "method of non-leaf was called"); 
+        return nullptr; 
+    } 
+ 
+    //! Increment reference count 
+    void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); } 
+ 
+    //! Decrement reference count and delete if it becomes zero. 
+    void remove_ref() { 
+        __TBB_ASSERT(ref_count>0,"ref_count underflow"); 
+        if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) { 
+            this->~filter_node(); 
+            r1::deallocate_memory(this); 
+        } 
+    } 
+ 
+    virtual ~filter_node() { 
+#ifdef __TBB_TEST_FILTER_NODE_COUNT 
+        --(__TBB_TEST_FILTER_NODE_COUNT); 
+#endif 
+    } 
+}; 
+ 
+inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) { 
+    if (my_node) { 
+        my_node->add_ref(); 
+    } 
+} 
+ 
+inline filter_node_ptr::~filter_node_ptr() { 
+    if (my_node) { 
+        my_node->remove_ref(); 
+    } 
+} 
+ 
+inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) { 
+    if (my_node) { 
+        my_node->add_ref(); 
+    } 
+} 
+ 
+inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) { 
+    rhs.my_node = nullptr; 
+} 
+ 
+inline void filter_node_ptr::operator=(filter_node * rhs) { 
+    // Order of operations below carefully chosen so that reference counts remain correct 
+    // in unlikely event that remove_ref throws exception. 
+    filter_node* old = my_node; 
+    my_node = rhs; 
+    if (my_node) { 
+        my_node->add_ref(); 
+    } 
+    if (old) { 
+        old->remove_ref(); 
+    } 
+} 
+ 
+inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) { 
+    *this = rhs.my_node; 
+} 
+ 
+inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { 
+    filter_node* old = my_node; 
+    my_node = rhs.my_node; 
+    rhs.my_node = nullptr; 
+    if (old) { 
+        old->remove_ref(); 
+    } 
+} 
+ 
+inline filter_node& filter_node_ptr::operator*() const{ 
+    __TBB_ASSERT(my_node,"NULL node is used"); 
+    return *my_node; 
+} 
+ 
+inline filter_node_ptr::operator bool() const { 
+    return my_node != nullptr; 
+} 
+ 
+//! Node in parse tree representing result of make_filter. 
+template<typename InputType, typename OutputType, typename Body> 
+class filter_node_leaf: public filter_node { 
+    const unsigned int my_mode; 
+    const Body my_body; 
+    base_filter* create_filter() const override { 
+        return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body); 
+    } 
+public: 
+    filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {} 
+}; 
+ 
+ 
+template <typename Body, typename Input = typename body_types<decltype(&Body::operator())>::input_type> 
+using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type; 
+ 
+template <typename Body> 
+using filter_output = typename body_types<decltype(&Body::operator())>::output_type; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+ 
+#endif /* __TBB_parallel_filters_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h
index 55f94dce00..d6f483c2ea 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h
@@ -1,46 +1,46 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__pipeline_filters_deduction_H
-#define __TBB__pipeline_filters_deduction_H
-
-#include "_config.h"
-#include <utility>
-#include <type_traits>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename Input, typename Output>
-struct declare_fitler_types {
-    using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type;
-    using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type;
-};
-
-template <typename T> struct body_types;
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output(T::*)(Input) const> : declare_fitler_types<Input, Output> {};
-
-template <typename T, typename Input, typename Output>
-struct body_types<Output(T::*)(Input)> : declare_fitler_types<Input, Output> {};
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB__pipeline_filters_deduction_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__pipeline_filters_deduction_H 
+#define __TBB__pipeline_filters_deduction_H 
+ 
+#include "_config.h" 
+#include <utility> 
+#include <type_traits> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename Input, typename Output> 
+struct declare_fitler_types { 
+    using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type; 
+    using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type; 
+}; 
+ 
+template <typename T> struct body_types; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output(T::*)(Input) const> : declare_fitler_types<Input, Output> {}; 
+ 
+template <typename T, typename Input, typename Output> 
+struct body_types<Output(T::*)(Input)> : declare_fitler_types<Input, Output> {}; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB__pipeline_filters_deduction_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h
index 36c4ca84ee..2146d127a4 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h
@@ -1,76 +1,76 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__range_common_H
-#define __TBB_detail__range_common_H
-
-#include "_config.h"
-#include "_utils.h"
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-//! Dummy type that distinguishes splitting constructor from copy constructor.
-/**
- * See description of parallel_for and parallel_reduce for example usages.
- * @ingroup algorithms
- */
-class split {};
-
-//! Type enables transmission of splitting proportion from partitioners to range objects
-/**
- * In order to make use of such facility Range objects must implement
- * splitting constructor with this type passed.
- */
-class proportional_split : no_assign {
-public:
-    proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { }
-
-    size_t left() const { return my_left; }
-    size_t right() const { return my_right; }
-
-    // used when range does not support proportional split
-    explicit operator split() const { return split(); }
-
-private:
-    size_t my_left, my_right;
-};
-
-template <typename Range, typename = void>
-struct range_split_object_provider {
-    template <typename PartitionerSplitType>
-    static split get( PartitionerSplitType& ) { return split(); }
-};
-
-template <typename Range>
-struct range_split_object_provider<Range,
-                                   typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> {
-    template <typename PartitionerSplitType>
-    static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; }
-};
-
-template <typename Range, typename PartitionerSplitType>
-auto get_range_split_object( PartitionerSplitType& split_obj )
--> decltype(range_split_object_provider<Range>::get(split_obj)) {
-    return range_split_object_provider<Range>::get(split_obj);
-}
-
-} // namespace d0
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__range_common_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__range_common_H 
+#define __TBB_detail__range_common_H 
+ 
+#include "_config.h" 
+#include "_utils.h" 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+//! Dummy type that distinguishes splitting constructor from copy constructor. 
+/** 
+ * See description of parallel_for and parallel_reduce for example usages. 
+ * @ingroup algorithms 
+ */ 
+class split {}; 
+ 
+//! Type enables transmission of splitting proportion from partitioners to range objects 
+/** 
+ * In order to make use of such facility Range objects must implement 
+ * splitting constructor with this type passed. 
+ */ 
+class proportional_split : no_assign { 
+public: 
+    proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { } 
+ 
+    size_t left() const { return my_left; } 
+    size_t right() const { return my_right; } 
+ 
+    // used when range does not support proportional split 
+    explicit operator split() const { return split(); } 
+ 
+private: 
+    size_t my_left, my_right; 
+}; 
+ 
+template <typename Range, typename = void> 
+struct range_split_object_provider { 
+    template <typename PartitionerSplitType> 
+    static split get( PartitionerSplitType& ) { return split(); } 
+}; 
+ 
+template <typename Range> 
+struct range_split_object_provider<Range, 
+                                   typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> { 
+    template <typename PartitionerSplitType> 
+    static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } 
+}; 
+ 
+template <typename Range, typename PartitionerSplitType> 
+auto get_range_split_object( PartitionerSplitType& split_obj ) 
+-> decltype(range_split_object_provider<Range>::get(split_obj)) { 
+    return range_split_object_provider<Range>::get(split_obj); 
+} 
+ 
+} // namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__range_common_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h
index 28ef9f042e..97b077993d 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h
@@ -1,162 +1,162 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__rtm_mutex_impl_H
-#define __TBB__rtm_mutex_impl_H
-
-#include "_assert.h"
-#include "_utils.h"
-#include "../spin_mutex.h"
-
-#include "../profiling.h"
-
-namespace tbb {
-namespace detail {
-namespace r1 {
-struct rtm_mutex_impl;
-}
-namespace d1 {
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Suppress warning: structure was padded due to alignment specifier
-    #pragma warning (push)
-    #pragma warning (disable: 4324)
-#endif
-
-/** A rtm_mutex is an speculation-enabled spin mutex.
-    It should be used for locking short critical sections where the lock is
-    contended but the data it protects are not.  If zero-initialized, the
-    mutex is considered unheld.
-    @ingroup synchronization */
-class alignas(max_nfs_size) rtm_mutex : private spin_mutex {
-private:
-    enum class rtm_state {
-        rtm_none,
-        rtm_transacting,
-        rtm_real
-    };
-public:
-    //! Constructors
-    rtm_mutex() noexcept {
-        create_itt_sync(this, "tbb::speculative_spin_mutex", "");
-    }
-
-    //! Destructor
-    ~rtm_mutex() = default;
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock {
-    public:
-        friend class rtm_mutex;
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {}
-
-        //! Acquire lock on given mutex.
-        scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {
-            acquire(m);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if(m_transaction_state != rtm_state::rtm_none) {
-                release();
-            }
-        }
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        //! Acquire lock on given mutex.
-        void acquire(rtm_mutex& m);
-
-        //! Try acquire lock on given mutex.
-        bool try_acquire(rtm_mutex& m);
-
-        //! Release lock
-        void release();
-
-    private:
-        rtm_mutex* m_mutex;
-        rtm_state m_transaction_state;
-        friend r1::rtm_mutex_impl;
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = false;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = false;
-private:
-    friend r1::rtm_mutex_impl;
-}; // end of rtm_mutex
-} // namespace d1
-
-namespace r1 {
-    //! Internal acquire lock.
-    // only_speculate == true if we're doing a try_lock, else false.
-    void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false);
-    //! Internal try_acquire lock.
-    bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&);
-    //! Internal release lock.
-    void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&);
-} // namespace r1
-
-namespace d1 {
-//! Acquire lock on given mutex.
-inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) {
-    __TBB_ASSERT(!m_mutex, "lock is already acquired");
-    r1::acquire(m, *this);
-}
-
-//! Try acquire lock on given mutex.
-inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) {
-    __TBB_ASSERT(!m_mutex, "lock is already acquired");
-    return r1::try_acquire(m, *this);
-}
-
-//! Release lock
-inline void rtm_mutex::scoped_lock::release() {
-    __TBB_ASSERT(m_mutex, "lock is not acquired");
-    __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired");
-    return r1::release(*this);
-}
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop) // 4324 warning
-#endif
-
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(rtm_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(rtm_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif // WIN
-#else
-inline void set_name(rtm_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(rtm_mutex&, const wchar_t*) {}
-#endif // WIN
-#endif
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif /* __TBB__rtm_mutex_impl_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__rtm_mutex_impl_H 
+#define __TBB__rtm_mutex_impl_H 
+ 
+#include "_assert.h" 
+#include "_utils.h" 
+#include "../spin_mutex.h" 
+ 
+#include "../profiling.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace r1 { 
+struct rtm_mutex_impl; 
+} 
+namespace d1 { 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    // Suppress warning: structure was padded due to alignment specifier 
+    #pragma warning (push) 
+    #pragma warning (disable: 4324) 
+#endif 
+ 
+/** A rtm_mutex is an speculation-enabled spin mutex. 
+    It should be used for locking short critical sections where the lock is 
+    contended but the data it protects are not.  If zero-initialized, the 
+    mutex is considered unheld. 
+    @ingroup synchronization */ 
+class alignas(max_nfs_size) rtm_mutex : private spin_mutex { 
+private: 
+    enum class rtm_state { 
+        rtm_none, 
+        rtm_transacting, 
+        rtm_real 
+    }; 
+public: 
+    //! Constructors 
+    rtm_mutex() noexcept { 
+        create_itt_sync(this, "tbb::speculative_spin_mutex", ""); 
+    } 
+ 
+    //! Destructor 
+    ~rtm_mutex() = default; 
+ 
+    //! Represents acquisition of a mutex. 
+    class scoped_lock { 
+    public: 
+        friend class rtm_mutex; 
+        //! Construct lock that has not acquired a mutex. 
+        /** Equivalent to zero-initialization of *this. */ 
+        constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {} 
+ 
+        //! Acquire lock on given mutex. 
+        scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) { 
+            acquire(m); 
+        } 
+ 
+        //! Release lock (if lock is held). 
+        ~scoped_lock() { 
+            if(m_transaction_state != rtm_state::rtm_none) { 
+                release(); 
+            } 
+        } 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        //! Acquire lock on given mutex. 
+        void acquire(rtm_mutex& m); 
+ 
+        //! Try acquire lock on given mutex. 
+        bool try_acquire(rtm_mutex& m); 
+ 
+        //! Release lock 
+        void release(); 
+ 
+    private: 
+        rtm_mutex* m_mutex; 
+        rtm_state m_transaction_state; 
+        friend r1::rtm_mutex_impl; 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = false; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = false; 
+private: 
+    friend r1::rtm_mutex_impl; 
+}; // end of rtm_mutex 
+} // namespace d1 
+ 
+namespace r1 { 
+    //! Internal acquire lock. 
+    // only_speculate == true if we're doing a try_lock, else false. 
+    void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); 
+    //! Internal try_acquire lock. 
+    bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); 
+    //! Internal release lock. 
+    void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); 
+} // namespace r1 
+ 
+namespace d1 { 
+//! Acquire lock on given mutex. 
+inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) { 
+    __TBB_ASSERT(!m_mutex, "lock is already acquired"); 
+    r1::acquire(m, *this); 
+} 
+ 
+//! Try acquire lock on given mutex. 
+inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) { 
+    __TBB_ASSERT(!m_mutex, "lock is already acquired"); 
+    return r1::try_acquire(m, *this); 
+} 
+ 
+//! Release lock 
+inline void rtm_mutex::scoped_lock::release() { 
+    __TBB_ASSERT(m_mutex, "lock is not acquired"); 
+    __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired"); 
+    return r1::release(*this); 
+} 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    #pragma warning (pop) // 4324 warning 
+#endif 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(rtm_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(rtm_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif // WIN 
+#else 
+inline void set_name(rtm_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(rtm_mutex&, const wchar_t*) {} 
+#endif // WIN 
+#endif 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif /* __TBB__rtm_mutex_impl_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h
index b62e86bd0a..0cf64b2dba 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h
@@ -1,209 +1,209 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__rtm_rw_mutex_H
-#define __TBB_detail__rtm_rw_mutex_H
-
-#include "_assert.h"
-#include "_utils.h"
-#include "../spin_rw_mutex.h"
-
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-
-namespace r1 {
-struct rtm_rw_mutex_impl;
-}
-
-namespace d1 {
-
-constexpr std::size_t speculation_granularity = 64;
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Suppress warning: structure was padded due to alignment specifier
-    #pragma warning (push)
-    #pragma warning (disable: 4324)
-#endif
-
-//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference
-/** @ingroup synchronization */
-class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex {
-    friend struct r1::rtm_rw_mutex_impl;
-private:
-    enum class rtm_type {
-        rtm_not_in_mutex,
-        rtm_transacting_reader,
-        rtm_transacting_writer,
-        rtm_real_reader,
-        rtm_real_writer
-    };
-public:
-    //! Constructors
-    rtm_rw_mutex() noexcept : write_flag(false) {
-        create_itt_sync(this, "tbb::speculative_spin_rw_mutex", "");
-    }
-
-    //! Destructor
-    ~rtm_rw_mutex() = default;
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock {
-        friend struct r1::rtm_rw_mutex_impl;
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {}
-
-        //! Acquire lock on given mutex.
-        scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {
-            acquire(m, write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if(m_transaction_state != rtm_type::rtm_not_in_mutex) {
-                release();
-            }
-        }
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        //! Acquire lock on given mutex.
-        inline void acquire(rtm_rw_mutex& m, bool write = true);
-
-        //! Try acquire lock on given mutex.
-        inline bool try_acquire(rtm_rw_mutex& m, bool write = true);
-
-        //! Release lock
-        inline void release();
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        inline bool upgrade_to_writer();
-
-        //! Downgrade writer to become a reader.
-        inline bool downgrade_to_reader();
-
-    private:
-        rtm_rw_mutex* m_mutex;
-        rtm_type m_transaction_state;
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = true;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = false;
-
-private:
-    alignas(speculation_granularity) std::atomic<bool> write_flag;
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop) // 4324 warning
-#endif
-
-} // namespace d1
-
-namespace r1 {
-    //! Internal acquire write lock.
-    // only_speculate == true if we're doing a try_lock, else false.
-    void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false);
-    //! Internal acquire read lock.
-    // only_speculate == true if we're doing a try_lock, else false.
-    void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false);
-    //! Internal upgrade reader to become a writer.
-    bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&);
-    //! Internal downgrade writer to become a reader.
-    bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&);
-    //! Internal try_acquire write lock.
-    bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&);
-    //! Internal try_acquire read lock.
-    bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&);
-    //! Internal release lock.
-    void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&);
-}
-
-namespace d1 {
-//! Acquire lock on given mutex.
-void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) {
-    __TBB_ASSERT(!m_mutex, "lock is already acquired");
-    if (write) {
-        r1::acquire_writer(m, *this);
-    } else {
-        r1::acquire_reader(m, *this);
-    }
-}
-
-//! Try acquire lock on given mutex.
-bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) {
-    __TBB_ASSERT(!m_mutex, "lock is already acquired");
-    if (write) {
-        return r1::try_acquire_writer(m, *this);
-    } else {
-        return r1::try_acquire_reader(m, *this);
-    }
-}
-
-//! Release lock
-void rtm_rw_mutex::scoped_lock::release() {
-    __TBB_ASSERT(m_mutex, "lock is not acquired");
-    __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired");
-    return r1::release(*this);
-}
-
-//! Upgrade reader to become a writer.
-/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() {
-    __TBB_ASSERT(m_mutex, "lock is not acquired");
-    if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) {
-        return true; // Already a writer
-    }
-    return r1::upgrade(*this);
-}
-
-//! Downgrade writer to become a reader.
-bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() {
-    __TBB_ASSERT(m_mutex, "lock is not acquired");
-    if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) {
-        return true; // Already a reader
-    }
-    return r1::downgrade(*this);
-}
-
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(rtm_rw_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif // WIN
-#else
-inline void set_name(rtm_rw_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(rtm_rw_mutex&, const wchar_t*) {}
-#endif // WIN
-#endif
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__rtm_rw_mutex_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__rtm_rw_mutex_H 
+#define __TBB_detail__rtm_rw_mutex_H 
+ 
+#include "_assert.h" 
+#include "_utils.h" 
+#include "../spin_rw_mutex.h" 
+ 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace r1 { 
+struct rtm_rw_mutex_impl; 
+} 
+ 
+namespace d1 { 
+ 
+constexpr std::size_t speculation_granularity = 64; 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    // Suppress warning: structure was padded due to alignment specifier 
+    #pragma warning (push) 
+    #pragma warning (disable: 4324) 
+#endif 
+ 
+//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference 
+/** @ingroup synchronization */ 
+class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex { 
+    friend struct r1::rtm_rw_mutex_impl; 
+private: 
+    enum class rtm_type { 
+        rtm_not_in_mutex, 
+        rtm_transacting_reader, 
+        rtm_transacting_writer, 
+        rtm_real_reader, 
+        rtm_real_writer 
+    }; 
+public: 
+    //! Constructors 
+    rtm_rw_mutex() noexcept : write_flag(false) { 
+        create_itt_sync(this, "tbb::speculative_spin_rw_mutex", ""); 
+    } 
+ 
+    //! Destructor 
+    ~rtm_rw_mutex() = default; 
+ 
+    //! Represents acquisition of a mutex. 
+    class scoped_lock { 
+        friend struct r1::rtm_rw_mutex_impl; 
+    public: 
+        //! Construct lock that has not acquired a mutex. 
+        /** Equivalent to zero-initialization of *this. */ 
+        constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {} 
+ 
+        //! Acquire lock on given mutex. 
+        scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) { 
+            acquire(m, write); 
+        } 
+ 
+        //! Release lock (if lock is held). 
+        ~scoped_lock() { 
+            if(m_transaction_state != rtm_type::rtm_not_in_mutex) { 
+                release(); 
+            } 
+        } 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        //! Acquire lock on given mutex. 
+        inline void acquire(rtm_rw_mutex& m, bool write = true); 
+ 
+        //! Try acquire lock on given mutex. 
+        inline bool try_acquire(rtm_rw_mutex& m, bool write = true); 
+ 
+        //! Release lock 
+        inline void release(); 
+ 
+        //! Upgrade reader to become a writer. 
+        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ 
+        inline bool upgrade_to_writer(); 
+ 
+        //! Downgrade writer to become a reader. 
+        inline bool downgrade_to_reader(); 
+ 
+    private: 
+        rtm_rw_mutex* m_mutex; 
+        rtm_type m_transaction_state; 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = true; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = false; 
+ 
+private: 
+    alignas(speculation_granularity) std::atomic<bool> write_flag; 
+}; 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    #pragma warning (pop) // 4324 warning 
+#endif 
+ 
+} // namespace d1 
+ 
+namespace r1 { 
+    //! Internal acquire write lock. 
+    // only_speculate == true if we're doing a try_lock, else false. 
+    void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); 
+    //! Internal acquire read lock. 
+    // only_speculate == true if we're doing a try_lock, else false. 
+    void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); 
+    //! Internal upgrade reader to become a writer. 
+    bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); 
+    //! Internal downgrade writer to become a reader. 
+    bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); 
+    //! Internal try_acquire write lock. 
+    bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); 
+    //! Internal try_acquire read lock. 
+    bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); 
+    //! Internal release lock. 
+    void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); 
+} 
+ 
+namespace d1 { 
+//! Acquire lock on given mutex. 
+void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) { 
+    __TBB_ASSERT(!m_mutex, "lock is already acquired"); 
+    if (write) { 
+        r1::acquire_writer(m, *this); 
+    } else { 
+        r1::acquire_reader(m, *this); 
+    } 
+} 
+ 
+//! Try acquire lock on given mutex. 
+bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) { 
+    __TBB_ASSERT(!m_mutex, "lock is already acquired"); 
+    if (write) { 
+        return r1::try_acquire_writer(m, *this); 
+    } else { 
+        return r1::try_acquire_reader(m, *this); 
+    } 
+} 
+ 
+//! Release lock 
+void rtm_rw_mutex::scoped_lock::release() { 
+    __TBB_ASSERT(m_mutex, "lock is not acquired"); 
+    __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired"); 
+    return r1::release(*this); 
+} 
+ 
+//! Upgrade reader to become a writer. 
+/** Returns whether the upgrade happened without releasing and re-acquiring the lock */ 
+bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() { 
+    __TBB_ASSERT(m_mutex, "lock is not acquired"); 
+    if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) { 
+        return true; // Already a writer 
+    } 
+    return r1::upgrade(*this); 
+} 
+ 
+//! Downgrade writer to become a reader. 
+bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { 
+    __TBB_ASSERT(m_mutex, "lock is not acquired"); 
+    if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) { 
+        return true; // Already a reader 
+    } 
+    return r1::downgrade(*this); 
+} 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(rtm_rw_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif // WIN 
+#else 
+inline void set_name(rtm_rw_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(rtm_rw_mutex&, const wchar_t*) {} 
+#endif // WIN 
+#endif 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__rtm_rw_mutex_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h
index 480ec8135e..a676203137 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h
@@ -1,563 +1,563 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__segment_table_H
-#define __TBB_detail__segment_table_H
-
-#include "_config.h"
-#include "_allocator_traits.h"
-#include "_template_helpers.h"
-#include "_utils.h"
-#include "_assert.h"
-#include "_exception.h"
-#include <atomic>
-#include <type_traits>
-#include <memory>
-#include <cstring>
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(push)
-#pragma warning(disable: 4127) // warning C4127: conditional expression is constant
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable>
-class segment_table {
-public:
-    using value_type = T;
-    using segment_type = T*;
-    using atomic_segment = std::atomic<segment_type>;
-    using segment_table_type = atomic_segment*;
-
-    using size_type = std::size_t;
-    using segment_index_type = std::size_t;
-
-    using allocator_type = Allocator;
-
-    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
-    using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>;
-protected:
-    using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>;
-    using derived_type = DerivedType;
-
-    static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable;
-    static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8;
-public:
-    segment_table( const allocator_type& alloc = allocator_type() )
-        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table)
-        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
-    {
-        zero_table(my_embedded_table, pointers_per_embedded_table);
-    }
-
-    segment_table( const segment_table& other )
-        : my_segment_table_allocator(segment_table_allocator_traits::
-                                     select_on_container_copy_construction(other.my_segment_table_allocator))
-        , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
-    {
-        zero_table(my_embedded_table, pointers_per_embedded_table);
-        try_call( [&] {
-            internal_transfer(other, copy_segment_body_type{*this});
-        } ).on_exception( [&] {
-            clear();
-        });
-    }
-
-    segment_table( const segment_table& other, const allocator_type& alloc )
-        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table)
-        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
-    {
-        zero_table(my_embedded_table, pointers_per_embedded_table);
-        try_call( [&] {
-            internal_transfer(other, copy_segment_body_type{*this});
-        } ).on_exception( [&] {
-            clear();
-        });
-    }
-
-    segment_table( segment_table&& other )
-        : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table)
-        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
-    {
-        zero_table(my_embedded_table, pointers_per_embedded_table);
-        internal_move(std::move(other));
-    }
-
-    segment_table( segment_table&& other, const allocator_type& alloc )
-        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{}
-        , my_size{}, my_segment_table_allocation_failed{}
-    {
-        zero_table(my_embedded_table, pointers_per_embedded_table);
-        using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
-        internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type());
-    }
-
-    ~segment_table() {
-        clear();
-    }
-
-    segment_table& operator=( const segment_table& other ) {
-        if (this != &other) {
-            copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
-            internal_transfer(other, copy_segment_body_type{*this});
-        }
-        return *this;
-    }
-
-    segment_table& operator=( segment_table&& other ) 
-        noexcept(derived_type::is_noexcept_assignment)
-    {
-        using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment;
-        using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
-
-        if (this != &other) {
-            move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
-            internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>());
-        }
-        return *this;
-    }
-
-    void swap( segment_table& other ) 
-        noexcept(derived_type::is_noexcept_swap)
-    {
-        using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
-        using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap;
-
-        if (this != &other) {
-            swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
-            internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>());
-        }
-    }
-
-    segment_type get_segment( segment_index_type index ) const {
-        return get_table()[index] + segment_base(index);
-    }
-
-    value_type& operator[]( size_type index ) {
-        return internal_subscript<true>(index);
-    }
-
-    const value_type& operator[]( size_type index ) const {
-        return const_cast<segment_table*>(this)->internal_subscript<true>(index);
-    }
-
-    const segment_table_allocator_type& get_allocator() const {
-        return my_segment_table_allocator;
-    }
-
-    segment_table_allocator_type& get_allocator() {
-        return my_segment_table_allocator;
-    }
-
-    void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) {
-        // Allocate new segment
-        segment_type new_segment = self()->create_segment(table, seg_index, index);
-        if (new_segment != nullptr) {
-            // Store (new_segment - segment_base) into the segment table to allow access to the table by index via
-            // my_segment_table[segment_index_of(index)][index]
-            segment_type disabled_segment = nullptr;
-            if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) {
-                // compare_exchange failed => some other thread has already enabled this segment
-                // Deallocate the memory
-                self()->deallocate_segment(new_segment, seg_index);
-            }
-        }
-
-        segment = table[seg_index].load(std::memory_order_acquire);
-        __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table");
-    }
-
-    void delete_segment( segment_index_type seg_index ) {
-        segment_type disabled_segment = nullptr;
-        // Set the pointer to the segment to NULL in the table
-        segment_type segment_to_delete = get_table()[seg_index].exchange(disabled_segment);
-        if (segment_to_delete == segment_allocation_failure_tag) {
-            return;
-        }
-
-        segment_to_delete += segment_base(seg_index);
-
-        // Deallocate the segment
-        self()->destroy_segment(segment_to_delete, seg_index);
-    }
-
-    size_type number_of_segments( segment_table_type table ) const {
-        // Check for an active table, if it is embedded table - return the number of embedded segments
-        // Otherwise - return the maximum number of segments
-        return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table;
-    }
-
-    size_type capacity() const noexcept {
-        segment_table_type table = get_table();
-        size_type num_segments = number_of_segments(table);
-        for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) {
-            // Check if the pointer is valid (allocated)
-            if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) {
-                return segment_base(seg_index);
-            }
-        }
-        return segment_base(num_segments);
-    }
-
-    size_type find_last_allocated_segment( segment_table_type table ) const noexcept {
-        size_type end = 0;
-        size_type num_segments = number_of_segments(table);
-        for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) {
-            // Check if the pointer is valid (allocated)
-            if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) {
-                end = seg_index + 1;
-            }
-        }
-        return end;
-    }
-
-    void reserve( size_type n ) {
-        if (n > allocator_traits_type::max_size(my_segment_table_allocator)) {
-            throw_exception(exception_id::reservation_length_error);
-        }
-
-        size_type size = my_size.load(std::memory_order_relaxed);
-        segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1;
-        for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) {
-                size_type first_index = segment_base(seg_idx);
-                internal_subscript<true>(first_index);
-        }
-    }
-
-    void clear() {
-        clear_segments();
-        clear_table();
-        my_size.store(0, std::memory_order_relaxed);
-        my_first_block.store(0, std::memory_order_relaxed);
-    }
-
-    void clear_segments() {
-        segment_table_type current_segment_table = get_table();
-        for (size_type i = number_of_segments(current_segment_table); i != 0; --i) {
-            if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) {
-                // If the segment was enabled - disable and deallocate it
-                delete_segment(i - 1);
-            }
-        }
-    }
-
-    void clear_table() {
-        segment_table_type current_segment_table = get_table();
-        if (current_segment_table != my_embedded_table) {
-            // If the active table is not the embedded one - deallocate the active table
-            for (size_type i = 0; i != pointers_per_long_table; ++i) {
-                segment_table_allocator_traits::destroy(my_segment_table_allocator, &current_segment_table[i]);
-            }
-
-            segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table);
-            my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
-            zero_table(my_embedded_table, pointers_per_embedded_table);
-        }
-    }
-
-    void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) {
-        // extend_segment_table if an active table is an embedded table
-        // and the requested index is not in the embedded table
-        if (table == my_embedded_table && end_index > embedded_table_size) {
-            if (start_index <= embedded_table_size) {
-                try_call([&] {
-                    table = self()->allocate_long_table(my_embedded_table, start_index);
-                    // It is possible that the table was extended by the thread that allocated first_block.
-                    // In this case it is necessary to re-read the current table.
-
-                    if (table) {
-                        my_segment_table.store(table, std::memory_order_release);
-                    } else {
-                        table = my_segment_table.load(std::memory_order_acquire);
-                    }
-                }).on_exception([&] {
-                    my_segment_table_allocation_failed.store(true, std::memory_order_relaxed);
-                });
-            } else {
-                atomic_backoff backoff;
-                do {
-                    if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) {
-                        throw_exception(exception_id::bad_alloc);
-                    }
-                    backoff.pause();
-                    table = my_segment_table.load(std::memory_order_acquire); 
-                } while (table == my_embedded_table);
-            }
-        }
-    }
-
-    // Return the segment where index is stored
-    static constexpr segment_index_type segment_index_of( size_type index ) {
-        return size_type(tbb::detail::log2(uintptr_t(index|1)));
-    }
-
-    // Needed to calculate the offset in segment
-    static constexpr size_type segment_base( size_type index ) {
-        return size_type(1) << index & ~size_type(1);
-    }
-
-    // Return size of the segment
-    static constexpr size_type segment_size( size_type index ) {
-        return index == 0 ? 2 : size_type(1) << index;
-    }
-
-private:
-
-    derived_type* self() {
-        return static_cast<derived_type*>(this);
-    }
-
-    struct copy_segment_body_type {
-        void operator()( segment_index_type index, segment_type from, segment_type to ) const {
-            my_instance.self()->copy_segment(index, from, to);
-        }
-        segment_table& my_instance;
-    };
-
-    struct move_segment_body_type {
-        void operator()( segment_index_type index, segment_type from, segment_type to ) const {
-            my_instance.self()->move_segment(index, from, to);
-        }
-        segment_table& my_instance;
-    };
-
-    // Transgers all segments from the other table
-    template <typename TransferBody>
-    void internal_transfer( const segment_table& other, TransferBody transfer_segment ) {
-        static_cast<derived_type*>(this)->destroy_elements();
-
-        assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed));
-        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-
-        segment_table_type other_table = other.get_table();
-        size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table));
-
-        // If an exception occurred in other, then the size may be greater than the size of the end segment.
-        size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ?
-            other.my_size.load(std::memory_order_relaxed) : end_segment_size;
-        other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size;
-
-        for (segment_index_type i = 0; segment_base(i) < other_size; ++i) {
-            // If the segment in other table is enabled - transfer it
-            if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag)
-            {
-                    my_size = segment_base(i);
-                    break;
-            } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) {
-                internal_subscript<true>(segment_base(i));
-                transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i),
-                                get_table()[i].load(std::memory_order_relaxed) + segment_base(i));
-            }
-        }
-    }
-
-    // Moves the other segment table
-    // Only equal allocators are allowed
-    void internal_move( segment_table&& other ) {
-        // NOTE: allocators should be equal
-        clear();
-        my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        // If an active table in other is embedded - restore all of the embedded segments
-        if (other.get_table() == other.my_embedded_table) {
-            for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) {
-                segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed);
-                my_embedded_table[i].store(other_segment, std::memory_order_relaxed);
-                other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed);
-            }
-            my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
-        } else {
-            my_segment_table.store(other.my_segment_table, std::memory_order_relaxed);
-            other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed);
-            zero_table(other.my_embedded_table, pointers_per_embedded_table);
-        }
-        other.my_size.store(0, std::memory_order_relaxed);
-    }
-
-    // Move construct the segment table with the allocator object
-    // if any instances of allocator_type are always equal
-    void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&,
-                                                 /*is_always_equal = */ std::true_type ) {
-        internal_move(std::move(other));
-    }
-
-    // Move construct the segment table with the allocator object
-    // if any instances of allocator_type are always equal
-    void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc,
-                                                 /*is_always_equal = */ std::false_type ) {
-        if (other.my_segment_table_allocator == alloc) {
-            // If allocators are equal - restore pointers
-            internal_move(std::move(other));
-        } else {
-            // If allocators are not equal - perform per element move with reallocation
-            try_call( [&] {
-                internal_transfer(other, move_segment_body_type{*this});
-            } ).on_exception( [&] {
-                clear();
-            });
-        }
-    }
-
-    // Move assigns the segment table to other is any instances of allocator_type are always equal
-    // or propagate_on_container_move_assignment is true
-    void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) {
-        internal_move(std::move(other));
-    }
-
-    // Move assigns the segment table to other is any instances of allocator_type are not always equal
-    // and propagate_on_container_move_assignment is false
-    void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) {
-        if (my_segment_table_allocator == other.my_segment_table_allocator) {
-            // If allocators are equal - restore pointers
-            internal_move(std::move(other));
-        } else {
-            // If allocators are not equal - perform per element move with reallocation
-            internal_transfer(other, move_segment_body_type{*this});
-        }
-    }
-
-    // Swaps two segment tables if any instances of allocator_type are always equal
-    // or propagate_on_container_swap is true
-    void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) {
-        internal_swap_fields(other);
-    }
-
-    // Swaps two segment tables if any instances of allocator_type are not always equal
-    // and propagate_on_container_swap is false
-    // According to the C++ standard, swapping of two containers with unequal allocators
-    // is an undefined behavior scenario
-    void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) {
-        __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator,
-                     "Swapping with unequal allocators is not allowed");
-        internal_swap_fields(other);
-    }
-
-    void internal_swap_fields( segment_table& other ) {
-        // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables
-        if (get_table() == my_embedded_table ||
-            other.get_table() == other.my_embedded_table) {
-
-            for (size_type i = 0; i != pointers_per_embedded_table; ++i) {
-                segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed);
-                segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed);
-
-                my_embedded_table[i].store(other_segment, std::memory_order_relaxed);
-                other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed);
-            }
-        }
-
-        segment_table_type current_segment_table = get_table();
-        segment_table_type other_segment_table = other.get_table();
-
-        // If an active table is an embedded one -
-        // store an active table in other to the embedded one from other
-        if (current_segment_table == my_embedded_table) {
-            other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed);
-        } else {
-            // Otherwise - store it to the active segment table
-            other.my_segment_table.store(current_segment_table, std::memory_order_relaxed);
-        }
-
-        // If an active table in other segment table is an embedded one -
-        // store an active table in other to the embedded one from *this
-        if (other_segment_table == other.my_embedded_table) {
-            my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
-        } else {
-            // Otherwise - store it to the active segment table in other
-            my_segment_table.store(other_segment_table, std::memory_order_relaxed);
-        }
-        auto first_block = other.my_first_block.load(std::memory_order_relaxed);
-        other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        my_first_block.store(first_block, std::memory_order_relaxed);
-
-        auto size = other.my_size.load(std::memory_order_relaxed);
-        other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        my_size.store(size, std::memory_order_relaxed);
-    }
-
-protected:
-    // A flag indicates that an exception was throws during segment allocations
-    const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1);
-    static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table);
-
-    template <bool allow_out_of_range_access>
-    value_type& internal_subscript( size_type index ) {
-        segment_index_type seg_index = segment_index_of(index);
-        segment_table_type table = my_segment_table.load(std::memory_order_acquire);
-        segment_type segment = nullptr;
-
-        if (allow_out_of_range_access) {
-            if (derived_type::allow_table_extending) {
-                extend_table_if_necessary(table, index, index + 1);
-            }
-
-            segment = table[seg_index].load(std::memory_order_acquire);
-            // If the required segment is disabled - enable it
-            if (segment == nullptr) {
-                enable_segment(segment, table, seg_index, index);
-            }
-            // Check if an exception was thrown during segment allocation
-            if (segment == segment_allocation_failure_tag) {
-                throw_exception(exception_id::bad_alloc);
-            }
-        } else {
-            segment = table[seg_index].load(std::memory_order_acquire);
-        }
-        __TBB_ASSERT(segment != nullptr, nullptr);
-
-        return segment[index];
-    }
-
-    void assign_first_block_if_necessary(segment_index_type index) {
-        size_type zero = 0;
-        if (this->my_first_block.load(std::memory_order_relaxed) == zero) {
-            this->my_first_block.compare_exchange_strong(zero, index);
-        }
-    }
-
-    void zero_table( segment_table_type table, size_type count ) {
-        for (size_type i = 0; i != count; ++i) {
-            table[i].store(nullptr, std::memory_order_relaxed);
-        }
-    }
-
-    segment_table_type get_table() const {
-        return my_segment_table.load(std::memory_order_acquire);
-    }
-
-    segment_table_allocator_type my_segment_table_allocator;
-    std::atomic<segment_table_type> my_segment_table;
-    atomic_segment my_embedded_table[pointers_per_embedded_table];
-    // Number of segments in first block
-    std::atomic<size_type> my_first_block;
-    // Number of elements in table
-    std::atomic<size_type> my_size;
-    // Flag to indicate failed extend table
-    std::atomic<bool> my_segment_table_allocation_failed;
-}; // class segment_table
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma warning(pop) // warning 4127 is back
-#endif
-
-#endif // __TBB_detail__segment_table_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__segment_table_H 
+#define __TBB_detail__segment_table_H 
+ 
+#include "_config.h" 
+#include "_allocator_traits.h" 
+#include "_template_helpers.h" 
+#include "_utils.h" 
+#include "_assert.h" 
+#include "_exception.h" 
+#include <atomic> 
+#include <type_traits> 
+#include <memory> 
+#include <cstring> 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(push) 
+#pragma warning(disable: 4127) // warning C4127: conditional expression is constant 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable> 
+class segment_table { 
+public: 
+    using value_type = T; 
+    using segment_type = T*; 
+    using atomic_segment = std::atomic<segment_type>; 
+    using segment_table_type = atomic_segment*; 
+ 
+    using size_type = std::size_t; 
+    using segment_index_type = std::size_t; 
+ 
+    using allocator_type = Allocator; 
+ 
+    using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; 
+    using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>; 
+protected: 
+    using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>; 
+    using derived_type = DerivedType; 
+ 
+    static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable; 
+    static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; 
+public: 
+    segment_table( const allocator_type& alloc = allocator_type() ) 
+        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) 
+        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} 
+    { 
+        zero_table(my_embedded_table, pointers_per_embedded_table); 
+    } 
+ 
+    segment_table( const segment_table& other ) 
+        : my_segment_table_allocator(segment_table_allocator_traits:: 
+                                     select_on_container_copy_construction(other.my_segment_table_allocator)) 
+        , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} 
+    { 
+        zero_table(my_embedded_table, pointers_per_embedded_table); 
+        try_call( [&] { 
+            internal_transfer(other, copy_segment_body_type{*this}); 
+        } ).on_exception( [&] { 
+            clear(); 
+        }); 
+    } 
+ 
+    segment_table( const segment_table& other, const allocator_type& alloc ) 
+        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) 
+        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} 
+    { 
+        zero_table(my_embedded_table, pointers_per_embedded_table); 
+        try_call( [&] { 
+            internal_transfer(other, copy_segment_body_type{*this}); 
+        } ).on_exception( [&] { 
+            clear(); 
+        }); 
+    } 
+ 
+    segment_table( segment_table&& other ) 
+        : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) 
+        , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} 
+    { 
+        zero_table(my_embedded_table, pointers_per_embedded_table); 
+        internal_move(std::move(other)); 
+    } 
+ 
+    segment_table( segment_table&& other, const allocator_type& alloc ) 
+        : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} 
+        , my_size{}, my_segment_table_allocation_failed{} 
+    { 
+        zero_table(my_embedded_table, pointers_per_embedded_table); 
+        using is_equal_type = typename segment_table_allocator_traits::is_always_equal; 
+        internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); 
+    } 
+ 
+    ~segment_table() { 
+        clear(); 
+    } 
+ 
+    segment_table& operator=( const segment_table& other ) { 
+        if (this != &other) { 
+            copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); 
+            internal_transfer(other, copy_segment_body_type{*this}); 
+        } 
+        return *this; 
+    } 
+ 
+    segment_table& operator=( segment_table&& other )  
+        noexcept(derived_type::is_noexcept_assignment) 
+    { 
+        using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment; 
+        using is_equal_type = typename segment_table_allocator_traits::is_always_equal; 
+ 
+        if (this != &other) { 
+            move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); 
+            internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>()); 
+        } 
+        return *this; 
+    } 
+ 
+    void swap( segment_table& other )  
+        noexcept(derived_type::is_noexcept_swap) 
+    { 
+        using is_equal_type = typename segment_table_allocator_traits::is_always_equal; 
+        using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap; 
+ 
+        if (this != &other) { 
+            swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator); 
+            internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>()); 
+        } 
+    } 
+ 
+    segment_type get_segment( segment_index_type index ) const { 
+        return get_table()[index] + segment_base(index); 
+    } 
+ 
+    value_type& operator[]( size_type index ) { 
+        return internal_subscript<true>(index); 
+    } 
+ 
+    const value_type& operator[]( size_type index ) const { 
+        return const_cast<segment_table*>(this)->internal_subscript<true>(index); 
+    } 
+ 
+    const segment_table_allocator_type& get_allocator() const { 
+        return my_segment_table_allocator; 
+    } 
+ 
+    segment_table_allocator_type& get_allocator() { 
+        return my_segment_table_allocator; 
+    } 
+ 
+    void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) { 
+        // Allocate new segment 
+        segment_type new_segment = self()->create_segment(table, seg_index, index); 
+        if (new_segment != nullptr) { 
+            // Store (new_segment - segment_base) into the segment table to allow access to the table by index via 
+            // my_segment_table[segment_index_of(index)][index] 
+            segment_type disabled_segment = nullptr; 
+            if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) { 
+                // compare_exchange failed => some other thread has already enabled this segment 
+                // Deallocate the memory 
+                self()->deallocate_segment(new_segment, seg_index); 
+            } 
+        } 
+ 
+        segment = table[seg_index].load(std::memory_order_acquire); 
+        __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table"); 
+    } 
+ 
+    void delete_segment( segment_index_type seg_index ) { 
+        segment_type disabled_segment = nullptr; 
+        // Set the pointer to the segment to NULL in the table 
+        segment_type segment_to_delete = get_table()[seg_index].exchange(disabled_segment); 
+        if (segment_to_delete == segment_allocation_failure_tag) { 
+            return; 
+        } 
+ 
+        segment_to_delete += segment_base(seg_index); 
+ 
+        // Deallocate the segment 
+        self()->destroy_segment(segment_to_delete, seg_index); 
+    } 
+ 
+    size_type number_of_segments( segment_table_type table ) const { 
+        // Check for an active table, if it is embedded table - return the number of embedded segments 
+        // Otherwise - return the maximum number of segments 
+        return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table; 
+    } 
+ 
+    size_type capacity() const noexcept { 
+        segment_table_type table = get_table(); 
+        size_type num_segments = number_of_segments(table); 
+        for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { 
+            // Check if the pointer is valid (allocated) 
+            if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) { 
+                return segment_base(seg_index); 
+            } 
+        } 
+        return segment_base(num_segments); 
+    } 
+ 
+    size_type find_last_allocated_segment( segment_table_type table ) const noexcept { 
+        size_type end = 0; 
+        size_type num_segments = number_of_segments(table); 
+        for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { 
+            // Check if the pointer is valid (allocated) 
+            if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) { 
+                end = seg_index + 1; 
+            } 
+        } 
+        return end; 
+    } 
+ 
+    void reserve( size_type n ) { 
+        if (n > allocator_traits_type::max_size(my_segment_table_allocator)) { 
+            throw_exception(exception_id::reservation_length_error); 
+        } 
+ 
+        size_type size = my_size.load(std::memory_order_relaxed); 
+        segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1; 
+        for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) { 
+                size_type first_index = segment_base(seg_idx); 
+                internal_subscript<true>(first_index); 
+        } 
+    } 
+ 
+    void clear() { 
+        clear_segments(); 
+        clear_table(); 
+        my_size.store(0, std::memory_order_relaxed); 
+        my_first_block.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    void clear_segments() { 
+        segment_table_type current_segment_table = get_table(); 
+        for (size_type i = number_of_segments(current_segment_table); i != 0; --i) { 
+            if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) { 
+                // If the segment was enabled - disable and deallocate it 
+                delete_segment(i - 1); 
+            } 
+        } 
+    } 
+ 
+    void clear_table() { 
+        segment_table_type current_segment_table = get_table(); 
+        if (current_segment_table != my_embedded_table) { 
+            // If the active table is not the embedded one - deallocate the active table 
+            for (size_type i = 0; i != pointers_per_long_table; ++i) { 
+                segment_table_allocator_traits::destroy(my_segment_table_allocator, &current_segment_table[i]); 
+            } 
+ 
+            segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table); 
+            my_segment_table.store(my_embedded_table, std::memory_order_relaxed); 
+            zero_table(my_embedded_table, pointers_per_embedded_table); 
+        } 
+    } 
+ 
+    void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) { 
+        // extend_segment_table if an active table is an embedded table 
+        // and the requested index is not in the embedded table 
+        if (table == my_embedded_table && end_index > embedded_table_size) { 
+            if (start_index <= embedded_table_size) { 
+                try_call([&] { 
+                    table = self()->allocate_long_table(my_embedded_table, start_index); 
+                    // It is possible that the table was extended by the thread that allocated first_block. 
+                    // In this case it is necessary to re-read the current table. 
+ 
+                    if (table) { 
+                        my_segment_table.store(table, std::memory_order_release); 
+                    } else { 
+                        table = my_segment_table.load(std::memory_order_acquire); 
+                    } 
+                }).on_exception([&] { 
+                    my_segment_table_allocation_failed.store(true, std::memory_order_relaxed); 
+                }); 
+            } else { 
+                atomic_backoff backoff; 
+                do { 
+                    if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) { 
+                        throw_exception(exception_id::bad_alloc); 
+                    } 
+                    backoff.pause(); 
+                    table = my_segment_table.load(std::memory_order_acquire);  
+                } while (table == my_embedded_table); 
+            } 
+        } 
+    } 
+ 
+    // Return the segment where index is stored 
+    static constexpr segment_index_type segment_index_of( size_type index ) { 
+        return size_type(tbb::detail::log2(uintptr_t(index|1))); 
+    } 
+ 
+    // Needed to calculate the offset in segment 
+    static constexpr size_type segment_base( size_type index ) { 
+        return size_type(1) << index & ~size_type(1); 
+    } 
+ 
+    // Return size of the segment 
+    static constexpr size_type segment_size( size_type index ) { 
+        return index == 0 ? 2 : size_type(1) << index; 
+    } 
+ 
+private: 
+ 
+    derived_type* self() { 
+        return static_cast<derived_type*>(this); 
+    } 
+ 
+    struct copy_segment_body_type { 
+        void operator()( segment_index_type index, segment_type from, segment_type to ) const { 
+            my_instance.self()->copy_segment(index, from, to); 
+        } 
+        segment_table& my_instance; 
+    }; 
+ 
+    struct move_segment_body_type { 
+        void operator()( segment_index_type index, segment_type from, segment_type to ) const { 
+            my_instance.self()->move_segment(index, from, to); 
+        } 
+        segment_table& my_instance; 
+    }; 
+ 
+    // Transgers all segments from the other table 
+    template <typename TransferBody> 
+    void internal_transfer( const segment_table& other, TransferBody transfer_segment ) { 
+        static_cast<derived_type*>(this)->destroy_elements(); 
+ 
+        assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed)); 
+        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+ 
+        segment_table_type other_table = other.get_table(); 
+        size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table)); 
+ 
+        // If an exception occurred in other, then the size may be greater than the size of the end segment. 
+        size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ? 
+            other.my_size.load(std::memory_order_relaxed) : end_segment_size; 
+        other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size; 
+ 
+        for (segment_index_type i = 0; segment_base(i) < other_size; ++i) { 
+            // If the segment in other table is enabled - transfer it 
+            if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag) 
+            { 
+                    my_size = segment_base(i); 
+                    break; 
+            } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) { 
+                internal_subscript<true>(segment_base(i)); 
+                transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i), 
+                                get_table()[i].load(std::memory_order_relaxed) + segment_base(i)); 
+            } 
+        } 
+    } 
+ 
+    // Moves the other segment table 
+    // Only equal allocators are allowed 
+    void internal_move( segment_table&& other ) { 
+        // NOTE: allocators should be equal 
+        clear(); 
+        my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        // If an active table in other is embedded - restore all of the embedded segments 
+        if (other.get_table() == other.my_embedded_table) { 
+            for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) { 
+                segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); 
+                my_embedded_table[i].store(other_segment, std::memory_order_relaxed); 
+                other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed); 
+            } 
+            my_segment_table.store(my_embedded_table, std::memory_order_relaxed); 
+        } else { 
+            my_segment_table.store(other.my_segment_table, std::memory_order_relaxed); 
+            other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); 
+            zero_table(other.my_embedded_table, pointers_per_embedded_table); 
+        } 
+        other.my_size.store(0, std::memory_order_relaxed); 
+    } 
+ 
+    // Move construct the segment table with the allocator object 
+    // if any instances of allocator_type are always equal 
+    void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&, 
+                                                 /*is_always_equal = */ std::true_type ) { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    // Move construct the segment table with the allocator object 
+    // if any instances of allocator_type are always equal 
+    void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc, 
+                                                 /*is_always_equal = */ std::false_type ) { 
+        if (other.my_segment_table_allocator == alloc) { 
+            // If allocators are equal - restore pointers 
+            internal_move(std::move(other)); 
+        } else { 
+            // If allocators are not equal - perform per element move with reallocation 
+            try_call( [&] { 
+                internal_transfer(other, move_segment_body_type{*this}); 
+            } ).on_exception( [&] { 
+                clear(); 
+            }); 
+        } 
+    } 
+ 
+    // Move assigns the segment table to other is any instances of allocator_type are always equal 
+    // or propagate_on_container_move_assignment is true 
+    void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    // Move assigns the segment table to other is any instances of allocator_type are not always equal 
+    // and propagate_on_container_move_assignment is false 
+    void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) { 
+        if (my_segment_table_allocator == other.my_segment_table_allocator) { 
+            // If allocators are equal - restore pointers 
+            internal_move(std::move(other)); 
+        } else { 
+            // If allocators are not equal - perform per element move with reallocation 
+            internal_transfer(other, move_segment_body_type{*this}); 
+        } 
+    } 
+ 
+    // Swaps two segment tables if any instances of allocator_type are always equal 
+    // or propagate_on_container_swap is true 
+    void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) { 
+        internal_swap_fields(other); 
+    } 
+ 
+    // Swaps two segment tables if any instances of allocator_type are not always equal 
+    // and propagate_on_container_swap is false 
+    // According to the C++ standard, swapping of two containers with unequal allocators 
+    // is an undefined behavior scenario 
+    void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) { 
+        __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator, 
+                     "Swapping with unequal allocators is not allowed"); 
+        internal_swap_fields(other); 
+    } 
+ 
+    void internal_swap_fields( segment_table& other ) { 
+        // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables 
+        if (get_table() == my_embedded_table || 
+            other.get_table() == other.my_embedded_table) { 
+ 
+            for (size_type i = 0; i != pointers_per_embedded_table; ++i) { 
+                segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed); 
+                segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); 
+ 
+                my_embedded_table[i].store(other_segment, std::memory_order_relaxed); 
+                other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed); 
+            } 
+        } 
+ 
+        segment_table_type current_segment_table = get_table(); 
+        segment_table_type other_segment_table = other.get_table(); 
+ 
+        // If an active table is an embedded one - 
+        // store an active table in other to the embedded one from other 
+        if (current_segment_table == my_embedded_table) { 
+            other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); 
+        } else { 
+            // Otherwise - store it to the active segment table 
+            other.my_segment_table.store(current_segment_table, std::memory_order_relaxed); 
+        } 
+ 
+        // If an active table in other segment table is an embedded one - 
+        // store an active table in other to the embedded one from *this 
+        if (other_segment_table == other.my_embedded_table) { 
+            my_segment_table.store(my_embedded_table, std::memory_order_relaxed); 
+        } else { 
+            // Otherwise - store it to the active segment table in other 
+            my_segment_table.store(other_segment_table, std::memory_order_relaxed); 
+        } 
+        auto first_block = other.my_first_block.load(std::memory_order_relaxed); 
+        other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        my_first_block.store(first_block, std::memory_order_relaxed); 
+ 
+        auto size = other.my_size.load(std::memory_order_relaxed); 
+        other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        my_size.store(size, std::memory_order_relaxed); 
+    } 
+ 
+protected: 
+    // A flag indicates that an exception was throws during segment allocations 
+    const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1); 
+    static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table); 
+ 
+    template <bool allow_out_of_range_access> 
+    value_type& internal_subscript( size_type index ) { 
+        segment_index_type seg_index = segment_index_of(index); 
+        segment_table_type table = my_segment_table.load(std::memory_order_acquire); 
+        segment_type segment = nullptr; 
+ 
+        if (allow_out_of_range_access) { 
+            if (derived_type::allow_table_extending) { 
+                extend_table_if_necessary(table, index, index + 1); 
+            } 
+ 
+            segment = table[seg_index].load(std::memory_order_acquire); 
+            // If the required segment is disabled - enable it 
+            if (segment == nullptr) { 
+                enable_segment(segment, table, seg_index, index); 
+            } 
+            // Check if an exception was thrown during segment allocation 
+            if (segment == segment_allocation_failure_tag) { 
+                throw_exception(exception_id::bad_alloc); 
+            } 
+        } else { 
+            segment = table[seg_index].load(std::memory_order_acquire); 
+        } 
+        __TBB_ASSERT(segment != nullptr, nullptr); 
+ 
+        return segment[index]; 
+    } 
+ 
+    void assign_first_block_if_necessary(segment_index_type index) { 
+        size_type zero = 0; 
+        if (this->my_first_block.load(std::memory_order_relaxed) == zero) { 
+            this->my_first_block.compare_exchange_strong(zero, index); 
+        } 
+    } 
+ 
+    void zero_table( segment_table_type table, size_type count ) { 
+        for (size_type i = 0; i != count; ++i) { 
+            table[i].store(nullptr, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+    segment_table_type get_table() const { 
+        return my_segment_table.load(std::memory_order_acquire); 
+    } 
+ 
+    segment_table_allocator_type my_segment_table_allocator; 
+    std::atomic<segment_table_type> my_segment_table; 
+    atomic_segment my_embedded_table[pointers_per_embedded_table]; 
+    // Number of segments in first block 
+    std::atomic<size_type> my_first_block; 
+    // Number of elements in table 
+    std::atomic<size_type> my_size; 
+    // Flag to indicate failed extend table 
+    std::atomic<bool> my_segment_table_allocation_failed; 
+}; // class segment_table 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+#pragma warning(pop) // warning 4127 is back 
+#endif 
+ 
+#endif // __TBB_detail__segment_table_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h
index 8a10a61e1a..d7c6258f4b 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h
@@ -1,108 +1,108 @@
-/*
-    Copyright (c) 2020-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__small_object_pool_H
-#define __TBB__small_object_pool_H
-
-#include "_config.h"
-#include "_assert.h"
-
-#include "../profiling.h"
-#include <cstddef>
-#include <cstdint>
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class small_object_pool {
-protected:
-    small_object_pool() = default;
-};
-struct execution_data;
-}
-
-namespace r1 {
-void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes,
-                                    const d1::execution_data& ed);
-void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes);
-void  __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes,
-                                        const d1::execution_data& ed);
-void  __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes);
-}
-
-namespace d1 {
-class small_object_allocator {
-public:
-    template <typename Type, typename... Args>
-    Type* new_object(execution_data& ed, Args&&... args) {
-        void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed);
-
-        auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...);
-        return constructed_object;
-    }
-
-    template <typename Type, typename... Args>
-    Type* new_object(Args&&... args) {
-        void* allocated_object = r1::allocate(m_pool, sizeof(Type));
-
-        auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...);
-        return constructed_object;
-    }
-
-    template <typename Type>
-    void delete_object(Type* object, const execution_data& ed) {
-        // Copy this since the it can be the member of the passed object and
-        // unintentionally destroyed when Type destructor is called below
-        small_object_allocator alloc = *this;
-        object->~Type();
-        alloc.deallocate(object, ed);
-    }
-
-    template <typename Type>
-    void delete_object(Type* object) {
-        // Copy this since the it can be the member of the passed object and
-        // unintentionally destroyed when Type destructor is called below
-        small_object_allocator alloc = *this;
-        object->~Type();
-        alloc.deallocate(object);
-    }
-
-    template <typename Type>
-    void deallocate(Type* ptr, const execution_data& ed) {
-        call_itt_task_notify(destroy, ptr);
-
-        __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call");
-        r1::deallocate(*m_pool, ptr, sizeof(Type), ed);
-    }
-
-    template <typename Type>
-    void deallocate(Type* ptr) {
-        call_itt_task_notify(destroy, ptr);
-
-        __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call");
-        r1::deallocate(*m_pool, ptr, sizeof(Type));
-    }
-private:
-    small_object_pool* m_pool{};
-};
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif /* __TBB__small_object_pool_H */
+/* 
+    Copyright (c) 2020-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__small_object_pool_H 
+#define __TBB__small_object_pool_H 
+ 
+#include "_config.h" 
+#include "_assert.h" 
+ 
+#include "../profiling.h" 
+#include <cstddef> 
+#include <cstdint> 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class small_object_pool { 
+protected: 
+    small_object_pool() = default; 
+}; 
+struct execution_data; 
+} 
+ 
+namespace r1 { 
+void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, 
+                                    const d1::execution_data& ed); 
+void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); 
+void  __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, 
+                                        const d1::execution_data& ed); 
+void  __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); 
+} 
+ 
+namespace d1 { 
+class small_object_allocator { 
+public: 
+    template <typename Type, typename... Args> 
+    Type* new_object(execution_data& ed, Args&&... args) { 
+        void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed); 
+ 
+        auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); 
+        return constructed_object; 
+    } 
+ 
+    template <typename Type, typename... Args> 
+    Type* new_object(Args&&... args) { 
+        void* allocated_object = r1::allocate(m_pool, sizeof(Type)); 
+ 
+        auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); 
+        return constructed_object; 
+    } 
+ 
+    template <typename Type> 
+    void delete_object(Type* object, const execution_data& ed) { 
+        // Copy this since the it can be the member of the passed object and 
+        // unintentionally destroyed when Type destructor is called below 
+        small_object_allocator alloc = *this; 
+        object->~Type(); 
+        alloc.deallocate(object, ed); 
+    } 
+ 
+    template <typename Type> 
+    void delete_object(Type* object) { 
+        // Copy this since the it can be the member of the passed object and 
+        // unintentionally destroyed when Type destructor is called below 
+        small_object_allocator alloc = *this; 
+        object->~Type(); 
+        alloc.deallocate(object); 
+    } 
+ 
+    template <typename Type> 
+    void deallocate(Type* ptr, const execution_data& ed) { 
+        call_itt_task_notify(destroy, ptr); 
+ 
+        __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); 
+        r1::deallocate(*m_pool, ptr, sizeof(Type), ed); 
+    } 
+ 
+    template <typename Type> 
+    void deallocate(Type* ptr) { 
+        call_itt_task_notify(destroy, ptr); 
+ 
+        __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); 
+        r1::deallocate(*m_pool, ptr, sizeof(Type)); 
+    } 
+private: 
+    small_object_pool* m_pool{}; 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif /* __TBB__small_object_pool_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h
index c06d5b5db0..a295f48ddb 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h
@@ -1,78 +1,78 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm")
-TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for")
-TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each")
-TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke")
-TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce")
-TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan")
-TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort")
-TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline")
-TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom")
-
-TBB_STRING_RESOURCE(FLOW_NULL, "null")
-TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
-TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
-TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
-TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
-TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
-TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
-TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
-TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
-TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
-TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
-TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
-TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node")
-TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
-TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
-TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")
-TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node")
-TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
-TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
-TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
-TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
-TBB_STRING_RESOURCE(FLOW_BODY, "body")
-TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
-TBB_STRING_RESOURCE(FLOW_NODE, "node")
-TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph")
-TBB_STRING_RESOURCE(USER_EVENT, "user_event")
-
-#if __TBB_FLOW_TRACE_CODEPTR
-TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address")
-#endif
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm") 
+TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for") 
+TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each") 
+TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke") 
+TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce") 
+TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan") 
+TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort") 
+TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline") 
+TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom") 
+ 
+TBB_STRING_RESOURCE(FLOW_NULL, "null") 
+TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node") 
+TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node") 
+TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node") 
+TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node") 
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)") 
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)") 
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)") 
+TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node") 
+TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node") 
+TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node") 
+TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node") 
+TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node") 
+TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node") 
+TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node") 
+TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node") 
+TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node") 
+TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node") 
+TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node") 
+TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8") 
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8") 
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9") 
+TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name") 
+TBB_STRING_RESOURCE(FLOW_BODY, "body") 
+TBB_STRING_RESOURCE(FLOW_GRAPH, "graph") 
+TBB_STRING_RESOURCE(FLOW_NODE, "node") 
+TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph") 
+TBB_STRING_RESOURCE(USER_EVENT, "user_event") 
+ 
+#if __TBB_FLOW_TRACE_CODEPTR 
+TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address") 
+#endif 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h
index 7b4f8521c6..32a2d9c8e8 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h
@@ -1,243 +1,243 @@
-/*
-    Copyright (c) 2020-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB__task_H
-#define __TBB__task_H
-
-#include "_config.h"
-#include "_assert.h"
-#include "_template_helpers.h"
-#include "_small_object_pool.h"
-
-#include "../profiling.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <climits>
-#include <utility>
-#include <atomic>
-#include <mutex>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-using slot_id = unsigned short;
-constexpr slot_id no_slot = slot_id(~0);
-constexpr slot_id any_slot = slot_id(~1);
-
-class task;
-class wait_context;
-class task_group_context;
-struct execution_data;
-}
-
-namespace r1 {
-//! Task spawn/wait entry points
-void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx);
-void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id);
-void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx);
-void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx);
-d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*);
-d1::task_group_context* __TBB_EXPORTED_FUNC current_context();
-
-// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms.
-struct suspend_point_type;
-using suspend_callback_type = void(*)(void*, suspend_point_type*);
-//! The resumable tasks entry points
-void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback);
-void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag);
-suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point();
-void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr);
-
-class thread_data;
-class task_dispatcher;
-class external_waiter;
-struct task_accessor;
-struct task_arena_impl;
-} // namespace r1
-
-namespace d1 {
-
-class task_arena;
-using suspend_point = r1::suspend_point_type*;
-
-#if __TBB_RESUMABLE_TASKS
-template <typename F>
-static void suspend_callback(void* user_callback, suspend_point sp) {
-    // Copy user function to a new stack after the context switch to avoid a race when the previous
-    // suspend point is resumed while the user_callback is being called.
-    F user_callback_copy = *static_cast<F*>(user_callback);
-    user_callback_copy(sp);
-}
-
-template <typename F>
-void suspend(F f) {
-    r1::suspend(&suspend_callback<F>, &f);
-}
-
-inline void resume(suspend_point tag) {
-    r1::resume(tag);
-}
-#endif /* __TBB_RESUMABLE_TASKS */
-
-// TODO align wait_context on cache lane
-class wait_context {
-    static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1);
-
-    std::uint64_t m_version_and_traits{1};
-    std::atomic<std::uint64_t> m_ref_count{};
-
-    void add_reference(std::int64_t delta) {
-        call_itt_task_notify(releasing, this);
-        std::uint64_t r = m_ref_count.fetch_add(delta) + delta;
-
-        __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected");
-
-        if (!r) {
-            // Some external waiters or coroutine waiters sleep in wait list
-            // Should to notify them that work is done
-            std::uintptr_t wait_ctx_addr = std::uintptr_t(this);
-            r1::notify_waiters(wait_ctx_addr);
-        }
-    }
-
-    bool continue_execution() const {
-        std::uint64_t r = m_ref_count.load(std::memory_order_acquire);
-        __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected");
-        return r > 0;
-    }
-
-    friend class r1::thread_data;
-    friend class r1::task_dispatcher;
-    friend class r1::external_waiter;
-    friend class task_group;
-    friend class task_group_base;
-    friend struct r1::task_arena_impl;
-    friend struct r1::suspend_point_type;
-public:
-    // Despite the internal reference count is uin64_t we limit the user interface with uint32_t
-    // to preserve a part of the internal reference count for special needs.
-    wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); }
-    wait_context(const wait_context&) = delete;
-
-    ~wait_context() {
-        __TBB_ASSERT(!continue_execution(), NULL);
-    }
-
-    void reserve(std::uint32_t delta = 1) {
-        add_reference(delta);
-    }
-
-    void release(std::uint32_t delta = 1) {
-        add_reference(-std::int64_t(delta));
-    }
-#if __TBB_EXTRA_DEBUG
-    unsigned reference_count() const {
-        return unsigned(m_ref_count.load(std::memory_order_acquire));
-    }
-#endif
-};
-
-struct execution_data {
-    task_group_context* context{};
-    slot_id original_slot{};
-    slot_id affinity_slot{};
-};
-
-inline task_group_context* context(const execution_data& ed) {
-    return ed.context;
-}
-
-inline slot_id original_slot(const execution_data& ed) {
-    return ed.original_slot;
-}
-
-inline slot_id affinity_slot(const execution_data& ed) {
-    return ed.affinity_slot;
-}
-
-inline slot_id execution_slot(const execution_data& ed) {
-    return r1::execution_slot(&ed);
-}
-
-inline bool is_same_affinity(const execution_data& ed) {
-    return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed);
-}
-
-inline bool is_stolen(const execution_data& ed) {
-    return original_slot(ed) != execution_slot(ed);
-}
-
-inline void spawn(task& t, task_group_context& ctx) {
-    call_itt_task_notify(releasing, &t);
-    r1::spawn(t, ctx);
-}
-
-inline void spawn(task& t, task_group_context& ctx, slot_id id) {
-    call_itt_task_notify(releasing, &t);
-    r1::spawn(t, ctx, id);
-}
-
-inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) {
-    r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx);
-    call_itt_task_notify(acquired, &wait_ctx);
-    call_itt_task_notify(destroy, &wait_ctx);
-}
-
-inline void wait(wait_context& wait_ctx, task_group_context& ctx) {
-    r1::wait(wait_ctx, ctx);
-    call_itt_task_notify(acquired, &wait_ctx);
-    call_itt_task_notify(destroy, &wait_ctx);
-}
-
-using r1::current_context;
-
-class task_traits {
-    std::uint64_t m_version_and_traits{};
-    friend struct r1::task_accessor;
-};
-
-//! Alignment for a task object
-static constexpr std::size_t task_alignment = 64;
-
-//! Base class for user-defined tasks.
-/** @ingroup task_scheduling */
-
-class
-#if __TBB_ALIGNAS_AVAILABLE
-alignas(task_alignment)
-#endif
-task : public task_traits {
-protected:
-    virtual ~task() = default;
-
-public:
-    virtual task* execute(execution_data&) = 0;
-    virtual task* cancel(execution_data&) = 0;
-
-private:
-    std::uint64_t m_reserved[6]{};
-    friend struct r1::task_accessor;
-};
-#if __TBB_ALIGNAS_AVAILABLE
-static_assert(sizeof(task) == task_alignment, "task size is broken");
-#endif
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-#endif /* __TBB__task_H */
+/* 
+    Copyright (c) 2020-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB__task_H 
+#define __TBB__task_H 
+ 
+#include "_config.h" 
+#include "_assert.h" 
+#include "_template_helpers.h" 
+#include "_small_object_pool.h" 
+ 
+#include "../profiling.h" 
+ 
+#include <cstddef> 
+#include <cstdint> 
+#include <climits> 
+#include <utility> 
+#include <atomic> 
+#include <mutex> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+using slot_id = unsigned short; 
+constexpr slot_id no_slot = slot_id(~0); 
+constexpr slot_id any_slot = slot_id(~1); 
+ 
+class task; 
+class wait_context; 
+class task_group_context; 
+struct execution_data; 
+} 
+ 
+namespace r1 { 
+//! Task spawn/wait entry points 
+void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); 
+void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); 
+void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); 
+void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); 
+d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); 
+d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); 
+ 
+// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. 
+struct suspend_point_type; 
+using suspend_callback_type = void(*)(void*, suspend_point_type*); 
+//! The resumable tasks entry points 
+void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); 
+void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); 
+suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); 
+void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); 
+ 
+class thread_data; 
+class task_dispatcher; 
+class external_waiter; 
+struct task_accessor; 
+struct task_arena_impl; 
+} // namespace r1 
+ 
+namespace d1 { 
+ 
+class task_arena; 
+using suspend_point = r1::suspend_point_type*; 
+ 
+#if __TBB_RESUMABLE_TASKS 
+template <typename F> 
+static void suspend_callback(void* user_callback, suspend_point sp) { 
+    // Copy user function to a new stack after the context switch to avoid a race when the previous 
+    // suspend point is resumed while the user_callback is being called. 
+    F user_callback_copy = *static_cast<F*>(user_callback); 
+    user_callback_copy(sp); 
+} 
+ 
+template <typename F> 
+void suspend(F f) { 
+    r1::suspend(&suspend_callback<F>, &f); 
+} 
+ 
+inline void resume(suspend_point tag) { 
+    r1::resume(tag); 
+} 
+#endif /* __TBB_RESUMABLE_TASKS */ 
+ 
+// TODO align wait_context on cache lane 
+class wait_context { 
+    static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1); 
+ 
+    std::uint64_t m_version_and_traits{1}; 
+    std::atomic<std::uint64_t> m_ref_count{}; 
+ 
+    void add_reference(std::int64_t delta) { 
+        call_itt_task_notify(releasing, this); 
+        std::uint64_t r = m_ref_count.fetch_add(delta) + delta; 
+ 
+        __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); 
+ 
+        if (!r) { 
+            // Some external waiters or coroutine waiters sleep in wait list 
+            // Should to notify them that work is done 
+            std::uintptr_t wait_ctx_addr = std::uintptr_t(this); 
+            r1::notify_waiters(wait_ctx_addr); 
+        } 
+    } 
+ 
+    bool continue_execution() const { 
+        std::uint64_t r = m_ref_count.load(std::memory_order_acquire); 
+        __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); 
+        return r > 0; 
+    } 
+ 
+    friend class r1::thread_data; 
+    friend class r1::task_dispatcher; 
+    friend class r1::external_waiter; 
+    friend class task_group; 
+    friend class task_group_base; 
+    friend struct r1::task_arena_impl; 
+    friend struct r1::suspend_point_type; 
+public: 
+    // Despite the internal reference count is uin64_t we limit the user interface with uint32_t 
+    // to preserve a part of the internal reference count for special needs. 
+    wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } 
+    wait_context(const wait_context&) = delete; 
+ 
+    ~wait_context() { 
+        __TBB_ASSERT(!continue_execution(), NULL); 
+    } 
+ 
+    void reserve(std::uint32_t delta = 1) { 
+        add_reference(delta); 
+    } 
+ 
+    void release(std::uint32_t delta = 1) { 
+        add_reference(-std::int64_t(delta)); 
+    } 
+#if __TBB_EXTRA_DEBUG 
+    unsigned reference_count() const { 
+        return unsigned(m_ref_count.load(std::memory_order_acquire)); 
+    } 
+#endif 
+}; 
+ 
+struct execution_data { 
+    task_group_context* context{}; 
+    slot_id original_slot{}; 
+    slot_id affinity_slot{}; 
+}; 
+ 
+inline task_group_context* context(const execution_data& ed) { 
+    return ed.context; 
+} 
+ 
+inline slot_id original_slot(const execution_data& ed) { 
+    return ed.original_slot; 
+} 
+ 
+inline slot_id affinity_slot(const execution_data& ed) { 
+    return ed.affinity_slot; 
+} 
+ 
+inline slot_id execution_slot(const execution_data& ed) { 
+    return r1::execution_slot(&ed); 
+} 
+ 
+inline bool is_same_affinity(const execution_data& ed) { 
+    return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed); 
+} 
+ 
+inline bool is_stolen(const execution_data& ed) { 
+    return original_slot(ed) != execution_slot(ed); 
+} 
+ 
+inline void spawn(task& t, task_group_context& ctx) { 
+    call_itt_task_notify(releasing, &t); 
+    r1::spawn(t, ctx); 
+} 
+ 
+inline void spawn(task& t, task_group_context& ctx, slot_id id) { 
+    call_itt_task_notify(releasing, &t); 
+    r1::spawn(t, ctx, id); 
+} 
+ 
+inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) { 
+    r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx); 
+    call_itt_task_notify(acquired, &wait_ctx); 
+    call_itt_task_notify(destroy, &wait_ctx); 
+} 
+ 
+inline void wait(wait_context& wait_ctx, task_group_context& ctx) { 
+    r1::wait(wait_ctx, ctx); 
+    call_itt_task_notify(acquired, &wait_ctx); 
+    call_itt_task_notify(destroy, &wait_ctx); 
+} 
+ 
+using r1::current_context; 
+ 
+class task_traits { 
+    std::uint64_t m_version_and_traits{}; 
+    friend struct r1::task_accessor; 
+}; 
+ 
+//! Alignment for a task object 
+static constexpr std::size_t task_alignment = 64; 
+ 
+//! Base class for user-defined tasks. 
+/** @ingroup task_scheduling */ 
+ 
+class 
+#if __TBB_ALIGNAS_AVAILABLE 
+alignas(task_alignment) 
+#endif 
+task : public task_traits { 
+protected: 
+    virtual ~task() = default; 
+ 
+public: 
+    virtual task* execute(execution_data&) = 0; 
+    virtual task* cancel(execution_data&) = 0; 
+ 
+private: 
+    std::uint64_t m_reserved[6]{}; 
+    friend struct r1::task_accessor; 
+}; 
+#if __TBB_ALIGNAS_AVAILABLE 
+static_assert(sizeof(task) == task_alignment, "task size is broken"); 
+#endif 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif /* __TBB__task_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h
index 45a8ffede6..e973bc3128 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h
@@ -1,394 +1,394 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__template_helpers_H
-#define __TBB_detail__template_helpers_H
-
-#include "_utils.h"
-#include "_config.h"
-
-#include <cstddef>
-#include <cstdint>
-
-#include <type_traits>
-#include <memory>
-#include <iterator>
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-// An internal implementation of void_t, which can be used in SFINAE contexts
-template <typename...>
-struct void_impl {
-    using type = void;
-}; // struct void_impl
-
-template <typename... Args>
-using void_t = typename void_impl<Args...>::type;
-
-// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502
-template <typename T, typename, template <typename> class... Checks>
-struct supports_impl {
-    using type = std::false_type;
-};
-
-template <typename T, template <typename> class... Checks>
-struct supports_impl<T, void_t<Checks<T>...>, Checks...> {
-    using type = std::true_type;
-};
-
-template <typename T, template <typename> class... Checks>
-using supports = typename supports_impl<T, void, Checks...>::type;
-
-//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size.
-template <unsigned u, unsigned long long ull >
-struct select_size_t_constant {
-    // Explicit cast is needed to avoid compiler warnings about possible truncation.
-    // The value of the right size,   which is selected by ?:, is anyway not truncated or promoted.
-    static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull);
-};
-
-// TODO: do we really need it?
-//! Cast between unrelated pointer types.
-/** This method should be used sparingly as a last resort for dealing with
-  situations that inherently break strict ISO C++ aliasing rules. */
-// T is a pointer type because it will be explicitly provided by the programmer as a template argument;
-// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process.
-template<typename T, typename U>
-inline T punned_cast( U* ptr ) {
-    std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr);
-    return reinterpret_cast<T>(x);
-}
-
-template<class T, size_t S, size_t R>
-struct padded_base : T {
-    char pad[S - R];
-};
-template<class T, size_t S> struct padded_base<T, S, 0> : T {};
-
-//! Pads type T to fill out to a multiple of cache line size.
-template<class T, size_t S = max_nfs_size>
-struct padded : padded_base<T, S, sizeof(T) % S> {};
-
-#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT
-
-using std::index_sequence;
-using std::make_index_sequence;
-
-#else
-
-template<std::size_t... S> class index_sequence {};
-
-template<std::size_t N, std::size_t... S>
-struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {};
-
-template<std::size_t... S>
-struct make_index_sequence_impl <0, S...> {
-    using type = index_sequence<S...>;
-};
-
-template<std::size_t N>
-using make_index_sequence = typename make_index_sequence_impl<N>::type;
-
-#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */
-
-#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
-using std::conjunction;
-using std::disjunction;
-#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
-
-template <typename...>
-struct conjunction : std::true_type {};
-
-template <typename First, typename... Args>
-struct conjunction<First, Args...>
-    : std::conditional<bool(First::value), conjunction<Args...>, First>::type {};
-
-template <typename T>
-struct conjunction<T> : T {};
-
-template <typename...>
-struct disjunction : std::false_type {};
-
-template <typename First, typename... Args>
-struct disjunction<First, Args...>
-    : std::conditional<bool(First::value), First, disjunction<Args...>>::type {};
-
-template <typename T>
-struct disjunction<T> : T {};
-
-#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
-
-template <typename Iterator>
-using iterator_value_t = typename std::iterator_traits<Iterator>::value_type;
-
-template <typename Iterator>
-using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type;
-
-template <typename Iterator>
-using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type;
-
-template <typename Iterator>
-using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type,
-                                        iterator_mapped_t<Iterator>>;
-
-template <typename A> using alloc_value_type = typename A::value_type;
-template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer;
-template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0));
-template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0));
-
-// alloc_value_type should be checked first, because it can be used in other checks
-template <typename T>
-using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>;
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template <typename T>
-inline constexpr bool is_allocator_v = is_allocator<T>::value;
-#endif
-
-// Template class in which the "type" determines the type of the element number N in pack Args
-template <std::size_t N, typename... Args>
-struct pack_element {
-    using type = void;
-};
-
-template <std::size_t N, typename T, typename... Args>
-struct pack_element<N, T, Args...> {
-    using type = typename pack_element<N-1, Args...>::type;
-};
-
-template <typename T, typename... Args>
-struct pack_element<0, T, Args...> {
-    using type = T;
-};
-
-template <std::size_t N, typename... Args>
-using pack_element_t = typename pack_element<N, Args...>::type;
-
-template <typename Func>
-class raii_guard {
-public:
-    raii_guard( Func f ) : my_func(f), is_active(true) {}
-
-    ~raii_guard() {
-        if (is_active) {
-            my_func();
-        }
-    }
-
-    void dismiss() {
-        is_active = false;
-    }
-private:
-    Func my_func;
-    bool is_active;
-}; // class raii_guard
-
-template <typename Func>
-raii_guard<Func> make_raii_guard( Func f ) {
-    return raii_guard<Func>(f);
-}
-
-template <typename Body>
-struct try_call_proxy {
-    try_call_proxy( Body b ) : body(b) {}
-
-    template <typename OnExceptionBody>
-    void on_exception( OnExceptionBody on_exception_body ) {
-        auto guard = make_raii_guard(on_exception_body);
-        body();
-        guard.dismiss();
-    }
-
-    template <typename OnCompletionBody>
-    void on_completion(OnCompletionBody on_completion_body) {
-        auto guard = make_raii_guard(on_completion_body);
-        body();
-    }
-
-    Body body;
-}; // struct try_call_proxy
-
-// Template helper function for API
-// try_call(lambda1).on_exception(lambda2)
-// Executes lambda1 and if it throws an exception - executes lambda2
-template <typename Body>
-try_call_proxy<Body> try_call( Body b ) {
-    return try_call_proxy<Body>(b);
-}
-
-#if __TBB_CPP17_IS_SWAPPABLE_PRESENT
-using std::is_nothrow_swappable;
-using std::is_swappable;
-#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT
-namespace is_swappable_detail {
-using std::swap;
-
-template <typename T>
-using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>()));
-
-#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER
-// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant.
-template <typename T>
-struct noexcept_wrapper {
-    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
-};
-template <typename T>
-struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {};
-#else
-template <typename T>
-struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {};
-#endif
-}
-
-template <typename T>
-struct is_swappable : supports<T, is_swappable_detail::has_swap> {};
-
-template <typename T>
-struct is_nothrow_swappable
-    : conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {};
-#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT
-
-//! Allows to store a function parameter pack as a variable and later pass it to another function
-template< typename... Types >
-struct stored_pack;
-
-template<>
-struct stored_pack<>
-{
-    using pack_type = stored_pack<>;
-    stored_pack() {}
-
-    // Friend front-end functions
-    template< typename F, typename Pack > friend void call(F&& f, Pack&& p);
-    template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p);
-
-protected:
-    // Ideally, ref-qualified non-static methods would be used,
-    // but that would greatly reduce the set of compilers where it works.
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) {
-        return std::forward<F>(f)(std::forward<Preceding>(params)...);
-    }
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) {
-        return std::forward<F>(f)(std::forward<Preceding>(params)...);
-    }
-};
-
-template< typename T, typename... Types >
-struct stored_pack<T, Types...> : stored_pack<Types...>
-{
-    using pack_type = stored_pack<T, Types...>;
-    using pack_remainder = stored_pack<Types...>;
-
-    // Since lifetime of original values is out of control, copies should be made.
-    // Thus references should be stripped away from the deduced type.
-    typename std::decay<T>::type leftmost_value;
-
-    // Here rvalue references act in the same way as forwarding references,
-    // as long as class template parameters were deduced via forwarding references.
-    stored_pack(T&& t, Types&&... types)
-    : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {}
-
-    // Friend front-end functions
-    template< typename F, typename Pack > friend void call(F&& f, Pack&& p);
-    template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p);
-
-protected:
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call(F&& f, pack_type& pack, Preceding&&... params) {
-        return pack_remainder::template call<Ret>(
-            std::forward<F>(f), static_cast<pack_remainder&>(pack),
-            std::forward<Preceding>(params)... , pack.leftmost_value
-        );
-    }
-
-    template< typename Ret, typename F, typename... Preceding >
-    static Ret call(F&& f, pack_type&& pack, Preceding&&... params) {
-        return pack_remainder::template call<Ret>(
-            std::forward<F>(f), static_cast<pack_remainder&&>(pack),
-            std::forward<Preceding>(params)... , std::move(pack.leftmost_value)
-        );
-    }
-};
-
-//! Calls the given function with arguments taken from a stored_pack
-template< typename F, typename Pack >
-void call(F&& f, Pack&& p) {
-    std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p));
-}
-
-template< typename Ret, typename F, typename Pack >
-Ret call_and_return(F&& f, Pack&& p) {
-    return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p));
-}
-
-template< typename... Types >
-stored_pack<Types...> save_pack(Types&&... types) {
-    return stored_pack<Types...>(std::forward<Types>(types)...);
-}
-
-// A structure with the value which is equal to Trait::value
-// but can be used in the immediate context due to parameter T
-template <typename Trait, typename T>
-struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {};
-
-template <typename Callable>
-struct body_arg_detector;
-
-template <typename Callable, typename ReturnType, typename Arg>
-struct body_arg_detector<ReturnType(Callable::*)(Arg)> {
-    using arg_type = Arg;
-};
-
-template <typename Callable, typename ReturnType, typename Arg>
-struct body_arg_detector<ReturnType(Callable::*)(Arg) const> {
-    using arg_type = Arg;
-};
-
-template <typename Callable>
-struct argument_detector;
-
-template <typename Callable>
-struct argument_detector {
-    using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type;
-};
-
-template <typename ReturnType, typename Arg>
-struct argument_detector<ReturnType(*)(Arg)> {
-    using type = Arg;
-};
-
-// Detects the argument type of callable, works for callable with one argument.
-template <typename Callable>
-using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type;
-
-template <typename T>
-struct type_identity {
-    using type = T;
-};
-
-template <typename T>
-using type_identity_t = typename type_identity<T>::type;
-
-} // inline namespace d0
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__template_helpers_H
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__template_helpers_H 
+#define __TBB_detail__template_helpers_H 
+ 
+#include "_utils.h" 
+#include "_config.h" 
+ 
+#include <cstddef> 
+#include <cstdint> 
+ 
+#include <type_traits> 
+#include <memory> 
+#include <iterator> 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+// An internal implementation of void_t, which can be used in SFINAE contexts 
+template <typename...> 
+struct void_impl { 
+    using type = void; 
+}; // struct void_impl 
+ 
+template <typename... Args> 
+using void_t = typename void_impl<Args...>::type; 
+ 
+// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502 
+template <typename T, typename, template <typename> class... Checks> 
+struct supports_impl { 
+    using type = std::false_type; 
+}; 
+ 
+template <typename T, template <typename> class... Checks> 
+struct supports_impl<T, void_t<Checks<T>...>, Checks...> { 
+    using type = std::true_type; 
+}; 
+ 
+template <typename T, template <typename> class... Checks> 
+using supports = typename supports_impl<T, void, Checks...>::type; 
+ 
+//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size. 
+template <unsigned u, unsigned long long ull > 
+struct select_size_t_constant { 
+    // Explicit cast is needed to avoid compiler warnings about possible truncation. 
+    // The value of the right size,   which is selected by ?:, is anyway not truncated or promoted. 
+    static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull); 
+}; 
+ 
+// TODO: do we really need it? 
+//! Cast between unrelated pointer types. 
+/** This method should be used sparingly as a last resort for dealing with 
+  situations that inherently break strict ISO C++ aliasing rules. */ 
+// T is a pointer type because it will be explicitly provided by the programmer as a template argument; 
+// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process. 
+template<typename T, typename U> 
+inline T punned_cast( U* ptr ) { 
+    std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr); 
+    return reinterpret_cast<T>(x); 
+} 
+ 
+template<class T, size_t S, size_t R> 
+struct padded_base : T { 
+    char pad[S - R]; 
+}; 
+template<class T, size_t S> struct padded_base<T, S, 0> : T {}; 
+ 
+//! Pads type T to fill out to a multiple of cache line size. 
+template<class T, size_t S = max_nfs_size> 
+struct padded : padded_base<T, S, sizeof(T) % S> {}; 
+ 
+#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT 
+ 
+using std::index_sequence; 
+using std::make_index_sequence; 
+ 
+#else 
+ 
+template<std::size_t... S> class index_sequence {}; 
+ 
+template<std::size_t N, std::size_t... S> 
+struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; 
+ 
+template<std::size_t... S> 
+struct make_index_sequence_impl <0, S...> { 
+    using type = index_sequence<S...>; 
+}; 
+ 
+template<std::size_t N> 
+using make_index_sequence = typename make_index_sequence_impl<N>::type; 
+ 
+#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */ 
+ 
+#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT 
+using std::conjunction; 
+using std::disjunction; 
+#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT 
+ 
+template <typename...> 
+struct conjunction : std::true_type {}; 
+ 
+template <typename First, typename... Args> 
+struct conjunction<First, Args...> 
+    : std::conditional<bool(First::value), conjunction<Args...>, First>::type {}; 
+ 
+template <typename T> 
+struct conjunction<T> : T {}; 
+ 
+template <typename...> 
+struct disjunction : std::false_type {}; 
+ 
+template <typename First, typename... Args> 
+struct disjunction<First, Args...> 
+    : std::conditional<bool(First::value), First, disjunction<Args...>>::type {}; 
+ 
+template <typename T> 
+struct disjunction<T> : T {}; 
+ 
+#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT 
+ 
+template <typename Iterator> 
+using iterator_value_t = typename std::iterator_traits<Iterator>::value_type; 
+ 
+template <typename Iterator> 
+using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type; 
+ 
+template <typename Iterator> 
+using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type; 
+ 
+template <typename Iterator> 
+using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type, 
+                                        iterator_mapped_t<Iterator>>; 
+ 
+template <typename A> using alloc_value_type = typename A::value_type; 
+template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer; 
+template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0)); 
+template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0)); 
+ 
+// alloc_value_type should be checked first, because it can be used in other checks 
+template <typename T> 
+using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>; 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template <typename T> 
+inline constexpr bool is_allocator_v = is_allocator<T>::value; 
+#endif 
+ 
+// Template class in which the "type" determines the type of the element number N in pack Args 
+template <std::size_t N, typename... Args> 
+struct pack_element { 
+    using type = void; 
+}; 
+ 
+template <std::size_t N, typename T, typename... Args> 
+struct pack_element<N, T, Args...> { 
+    using type = typename pack_element<N-1, Args...>::type; 
+}; 
+ 
+template <typename T, typename... Args> 
+struct pack_element<0, T, Args...> { 
+    using type = T; 
+}; 
+ 
+template <std::size_t N, typename... Args> 
+using pack_element_t = typename pack_element<N, Args...>::type; 
+ 
+template <typename Func> 
+class raii_guard { 
+public: 
+    raii_guard( Func f ) : my_func(f), is_active(true) {} 
+ 
+    ~raii_guard() { 
+        if (is_active) { 
+            my_func(); 
+        } 
+    } 
+ 
+    void dismiss() { 
+        is_active = false; 
+    } 
+private: 
+    Func my_func; 
+    bool is_active; 
+}; // class raii_guard 
+ 
+template <typename Func> 
+raii_guard<Func> make_raii_guard( Func f ) { 
+    return raii_guard<Func>(f); 
+} 
+ 
+template <typename Body> 
+struct try_call_proxy { 
+    try_call_proxy( Body b ) : body(b) {} 
+ 
+    template <typename OnExceptionBody> 
+    void on_exception( OnExceptionBody on_exception_body ) { 
+        auto guard = make_raii_guard(on_exception_body); 
+        body(); 
+        guard.dismiss(); 
+    } 
+ 
+    template <typename OnCompletionBody> 
+    void on_completion(OnCompletionBody on_completion_body) { 
+        auto guard = make_raii_guard(on_completion_body); 
+        body(); 
+    } 
+ 
+    Body body; 
+}; // struct try_call_proxy 
+ 
+// Template helper function for API 
+// try_call(lambda1).on_exception(lambda2) 
+// Executes lambda1 and if it throws an exception - executes lambda2 
+template <typename Body> 
+try_call_proxy<Body> try_call( Body b ) { 
+    return try_call_proxy<Body>(b); 
+} 
+ 
+#if __TBB_CPP17_IS_SWAPPABLE_PRESENT 
+using std::is_nothrow_swappable; 
+using std::is_swappable; 
+#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT 
+namespace is_swappable_detail { 
+using std::swap; 
+ 
+template <typename T> 
+using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>())); 
+ 
+#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER 
+// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant. 
+template <typename T> 
+struct noexcept_wrapper { 
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>())); 
+}; 
+template <typename T> 
+struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {}; 
+#else 
+template <typename T> 
+struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {}; 
+#endif 
+} 
+ 
+template <typename T> 
+struct is_swappable : supports<T, is_swappable_detail::has_swap> {}; 
+ 
+template <typename T> 
+struct is_nothrow_swappable 
+    : conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {}; 
+#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT 
+ 
+//! Allows to store a function parameter pack as a variable and later pass it to another function 
+template< typename... Types > 
+struct stored_pack; 
+ 
+template<> 
+struct stored_pack<> 
+{ 
+    using pack_type = stored_pack<>; 
+    stored_pack() {} 
+ 
+    // Friend front-end functions 
+    template< typename F, typename Pack > friend void call(F&& f, Pack&& p); 
+    template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); 
+ 
+protected: 
+    // Ideally, ref-qualified non-static methods would be used, 
+    // but that would greatly reduce the set of compilers where it works. 
+    template< typename Ret, typename F, typename... Preceding > 
+    static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) { 
+        return std::forward<F>(f)(std::forward<Preceding>(params)...); 
+    } 
+    template< typename Ret, typename F, typename... Preceding > 
+    static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) { 
+        return std::forward<F>(f)(std::forward<Preceding>(params)...); 
+    } 
+}; 
+ 
+template< typename T, typename... Types > 
+struct stored_pack<T, Types...> : stored_pack<Types...> 
+{ 
+    using pack_type = stored_pack<T, Types...>; 
+    using pack_remainder = stored_pack<Types...>; 
+ 
+    // Since lifetime of original values is out of control, copies should be made. 
+    // Thus references should be stripped away from the deduced type. 
+    typename std::decay<T>::type leftmost_value; 
+ 
+    // Here rvalue references act in the same way as forwarding references, 
+    // as long as class template parameters were deduced via forwarding references. 
+    stored_pack(T&& t, Types&&... types) 
+    : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {} 
+ 
+    // Friend front-end functions 
+    template< typename F, typename Pack > friend void call(F&& f, Pack&& p); 
+    template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); 
+ 
+protected: 
+    template< typename Ret, typename F, typename... Preceding > 
+    static Ret call(F&& f, pack_type& pack, Preceding&&... params) { 
+        return pack_remainder::template call<Ret>( 
+            std::forward<F>(f), static_cast<pack_remainder&>(pack), 
+            std::forward<Preceding>(params)... , pack.leftmost_value 
+        ); 
+    } 
+ 
+    template< typename Ret, typename F, typename... Preceding > 
+    static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { 
+        return pack_remainder::template call<Ret>( 
+            std::forward<F>(f), static_cast<pack_remainder&&>(pack), 
+            std::forward<Preceding>(params)... , std::move(pack.leftmost_value) 
+        ); 
+    } 
+}; 
+ 
+//! Calls the given function with arguments taken from a stored_pack 
+template< typename F, typename Pack > 
+void call(F&& f, Pack&& p) { 
+    std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p)); 
+} 
+ 
+template< typename Ret, typename F, typename Pack > 
+Ret call_and_return(F&& f, Pack&& p) { 
+    return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p)); 
+} 
+ 
+template< typename... Types > 
+stored_pack<Types...> save_pack(Types&&... types) { 
+    return stored_pack<Types...>(std::forward<Types>(types)...); 
+} 
+ 
+// A structure with the value which is equal to Trait::value 
+// but can be used in the immediate context due to parameter T 
+template <typename Trait, typename T> 
+struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {}; 
+ 
+template <typename Callable> 
+struct body_arg_detector; 
+ 
+template <typename Callable, typename ReturnType, typename Arg> 
+struct body_arg_detector<ReturnType(Callable::*)(Arg)> { 
+    using arg_type = Arg; 
+}; 
+ 
+template <typename Callable, typename ReturnType, typename Arg> 
+struct body_arg_detector<ReturnType(Callable::*)(Arg) const> { 
+    using arg_type = Arg; 
+}; 
+ 
+template <typename Callable> 
+struct argument_detector; 
+ 
+template <typename Callable> 
+struct argument_detector { 
+    using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type; 
+}; 
+ 
+template <typename ReturnType, typename Arg> 
+struct argument_detector<ReturnType(*)(Arg)> { 
+    using type = Arg; 
+}; 
+ 
+// Detects the argument type of callable, works for callable with one argument. 
+template <typename Callable> 
+using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type; 
+ 
+template <typename T> 
+struct type_identity { 
+    using type = T; 
+}; 
+ 
+template <typename T> 
+using type_identity_t = typename type_identity<T>::type; 
+ 
+} // inline namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__template_helpers_H 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h
index d1e02179f8..984bedd70d 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h
@@ -1,329 +1,329 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_detail__utils_H
-#define __TBB_detail__utils_H
-
-#include <type_traits>
-#include <cstdint>
-#include <atomic>
-
-#include "_config.h"
-#include "_assert.h"
-#include "_machine.h"
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-
-//! Utility template function to prevent "unused" warnings by various compilers.
-template<typename... T> void suppress_unused_warning(T&&...) {}
-
-//! Compile-time constant that is upper bound on cache line/sector size.
-/** It should be used only in situations where having a compile-time upper
-  bound is more useful than a run-time exact answer.
-  @ingroup memory_allocation */
-constexpr size_t max_nfs_size = 128;
-
-//! Class that implements exponential backoff.
-class atomic_backoff {
-    //! Time delay, in units of "pause" instructions.
-    /** Should be equal to approximately the number of "pause" instructions
-      that take the same time as an context switch. Must be a power of two.*/
-    static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16;
-    std::int32_t count;
-
-public:
-    // In many cases, an object of this type is initialized eagerly on hot path,
-    // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
-    // For this reason, the construction cost must be very small!
-    atomic_backoff() : count(1) {}
-    // This constructor pauses immediately; do not use on hot paths!
-    atomic_backoff(bool) : count(1) { pause(); }
-
-    //! No Copy
-    atomic_backoff(const atomic_backoff&) = delete;
-    atomic_backoff& operator=(const atomic_backoff&) = delete;
-
-    //! Pause for a while.
-    void pause() {
-        if (count <= LOOPS_BEFORE_YIELD) {
-            machine_pause(count);
-            // Pause twice as long the next time.
-            count *= 2;
-        } else {
-            // Pause is so long that we might as well yield CPU to scheduler.
-            yield();
-        }
-    }
-
-    //! Pause for a few times and return false if saturated.
-    bool bounded_pause() {
-        machine_pause(count);
-        if (count < LOOPS_BEFORE_YIELD) {
-            // Pause twice as long the next time.
-            count *= 2;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    void reset() {
-        count = 1;
-    }
-};
-
-//! Spin WHILE the condition is true.
-/** T and U should be comparable types. */
-template <typename T, typename C>
-void spin_wait_while_condition(const std::atomic<T>& location, C comp) {
-    atomic_backoff backoff;
-    while (comp(location.load(std::memory_order_acquire))) {
-        backoff.pause();
-    }
-}
-
-//! Spin WHILE the value of the variable is equal to a given value
-/** T and U should be comparable types. */
-template <typename T, typename U>
-void spin_wait_while_eq(const std::atomic<T>& location, const U value) {
-    spin_wait_while_condition(location, [&value](T t) { return t == value; });
-}
-
-//! Spin UNTIL the value of the variable is equal to a given value
-/** T and U should be comparable types. */
-template<typename T, typename U>
-void spin_wait_until_eq(const std::atomic<T>& location, const U value) {
-    spin_wait_while_condition(location, [&value](T t) { return t != value; });
-}
-
-template <typename T>
-std::uintptr_t log2(T in) {
-    __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined.");
-    return machine_log2(in);
-}
-
-template<typename T>
-T reverse_bits(T src) {
-    return machine_reverse_bits(src);
-}
-
-template<typename T>
-T reverse_n_bits(T src, std::size_t n) {
-    __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior.");
-    return reverse_bits(src) >> (number_of_bits<T>() - n);
-}
-
-// A function to check if passed integer is a power of two
-template <typename IntegerType>
-constexpr bool is_power_of_two( IntegerType arg ) {
-    static_assert(std::is_integral<IntegerType>::value,
-                  "An argument for is_power_of_two should be integral type");
-    return arg && (0 == (arg & (arg - 1)));
-}
-
-// A function to determine if passed integer is a power of two
-// at least as big as another power of two, i.e. for strictly positive i and j,
-// with j being a power of two, determines whether i==j<<k for some nonnegative k
-template <typename ArgIntegerType, typename DivisorIntegerType>
-constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) {
-    // Divisor should be a power of two
-    static_assert(std::is_integral<ArgIntegerType>::value,
-                  "An argument for is_power_of_two_at_least should be integral type");
-    return 0 == (arg & (arg - divisor));
-}
-
-// A function to compute arg modulo divisor where divisor is a power of 2.
-template<typename ArgIntegerType, typename DivisorIntegerType>
-inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) {
-    __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" );
-    return arg & (divisor - 1);
-}
-
-//! A function to check if passed in pointer is aligned on a specific border
-template<typename T>
-constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) {
-    return 0 == ((std::uintptr_t)pointer & (alignment - 1));
-}
-
-#if TBB_USE_ASSERT
-static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
-
-//! Set p to invalid pointer value.
-template<typename T>
-inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); }
-
-template<typename T>
-inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); }
-
-/** Expected to be used in assertions only, thus no empty form is defined. **/
-template<typename T>
-inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); }
-
-template<typename T>
-inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); }
-#else
-template<typename T>
-inline void poison_pointer(T* &) {/*do nothing*/}
-
-template<typename T>
-inline void poison_pointer(std::atomic<T*>&) { /* do nothing */}
-#endif /* !TBB_USE_ASSERT */
-
-template <std::size_t alignment = 0, typename T>
-bool assert_pointer_valid(T* p, const char* comment = nullptr) {
-    suppress_unused_warning(p, comment);
-    __TBB_ASSERT(p != nullptr, comment);
-    __TBB_ASSERT(!is_poisoned(p), comment);
-#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER)
-    __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment);
-#endif
-    // Returns something to simplify assert_pointers_valid implementation.
-    return true;
-}
-
-template <typename... Args>
-void assert_pointers_valid(Args*... p) {
-    // suppress_unused_warning is used as an evaluation context for the variadic pack.
-    suppress_unused_warning(assert_pointer_valid(p)...);
-}
-
-//! Base class for types that should not be assigned.
-class no_assign {
-public:
-    void operator=(const no_assign&) = delete;
-    no_assign(const no_assign&) = default;
-    no_assign() = default;
-};
-
-//! Base class for types that should not be copied or assigned.
-class no_copy: no_assign {
-public:
-    no_copy(const no_copy&) = delete;
-    no_copy() = default;
-};
-
-template <typename T>
-void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){
-    T tmp = lhs.load(std::memory_order_relaxed);
-    lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed);
-    rhs.store(tmp, std::memory_order_relaxed);
-}
-
-//! One-time initialization states
-enum class do_once_state {
-    uninitialized = 0,      ///< No execution attempts have been undertaken yet
-    pending,                ///< A thread is executing associated do-once routine
-    executed,               ///< Do-once routine has been executed
-    initialized = executed  ///< Convenience alias
-};
-
-//! One-time initialization function
-/** /param initializer Pointer to function without arguments
-           The variant that returns bool is used for cases when initialization can fail
-           and it is OK to continue execution, but the state should be reset so that
-           the initialization attempt was repeated the next time.
-    /param state Shared state associated with initializer that specifies its
-            initialization state. Must be initially set to #uninitialized value
-            (e.g. by means of default static zero initialization). **/
-template <typename F>
-void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) {
-    // The loop in the implementation is necessary to avoid race when thread T2
-    // that arrived in the middle of initialization attempt by another thread T1
-    // has just made initialization possible.
-    // In such a case T2 has to rely on T1 to initialize, but T1 may already be past
-    // the point where it can recognize the changed conditions.
-    do_once_state expected_state;
-    while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) {
-        if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) {
-            expected_state = do_once_state::uninitialized;
-#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910
-            using enum_type = typename std::underlying_type<do_once_state>::type;
-            if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) {
-#else
-            if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) {
-#endif
-                run_initializer( initializer, state );
-                break;
-            }
-        }
-        spin_wait_while_eq( state, do_once_state::pending );
-    }
-}
-
-// Run the initializer which can not fail
-template<typename Functor>
-void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) {
-    f();
-    state.store(do_once_state::executed, std::memory_order_release);
-}
-
-#if __TBB_CPP20_CONCEPTS_PRESENT
-template <typename T>
-concept boolean_testable_impl = std::convertible_to<T, bool>;
-
-template <typename T>
-concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) {
-                               { !std::forward<T>(t) } -> boolean_testable_impl;
-                           };
-
-#if __TBB_CPP20_COMPARISONS_PRESENT
-struct synthesized_three_way_comparator {
-    template <typename T1, typename T2>
-    auto operator()( const T1& lhs, const T2& rhs ) const
-        requires requires {
-            { lhs < rhs } -> boolean_testable;
-            { rhs < lhs } -> boolean_testable;
-        }
-    {
-        if constexpr (std::three_way_comparable_with<T1, T2>) {
-            return lhs <=> rhs;
-        } else {
-            if (lhs < rhs) {
-                return std::weak_ordering::less;
-            }
-            if (rhs < lhs) {
-                return std::weak_ordering::greater;
-            }
-            return std::weak_ordering::equivalent;
-        }
-    }
-}; // struct synthesized_three_way_comparator
-
-template <typename T1, typename T2 = T1>
-using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(),
-                                                                                 std::declval<T2&>()));
-
-#endif // __TBB_CPP20_COMPARISONS_PRESENT
-#endif // __TBB_CPP20_CONCEPTS_PRESENT
-
-} // namespace d0
-
-namespace d1 {
-
-class delegate_base {
-public:
-    virtual bool operator()() const = 0;
-    virtual ~delegate_base() {}
-}; // class delegate_base
-
-}  // namespace d1
-
-} // namespace detail
-} // namespace tbb
-
-#endif // __TBB_detail__utils_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_detail__utils_H 
+#define __TBB_detail__utils_H 
+ 
+#include <type_traits> 
+#include <cstdint> 
+#include <atomic> 
+ 
+#include "_config.h" 
+#include "_assert.h" 
+#include "_machine.h" 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+ 
+//! Utility template function to prevent "unused" warnings by various compilers. 
+template<typename... T> void suppress_unused_warning(T&&...) {} 
+ 
+//! Compile-time constant that is upper bound on cache line/sector size. 
+/** It should be used only in situations where having a compile-time upper 
+  bound is more useful than a run-time exact answer. 
+  @ingroup memory_allocation */ 
+constexpr size_t max_nfs_size = 128; 
+ 
+//! Class that implements exponential backoff. 
+class atomic_backoff { 
+    //! Time delay, in units of "pause" instructions. 
+    /** Should be equal to approximately the number of "pause" instructions 
+      that take the same time as an context switch. Must be a power of two.*/ 
+    static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16; 
+    std::int32_t count; 
+ 
+public: 
+    // In many cases, an object of this type is initialized eagerly on hot path, 
+    // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } 
+    // For this reason, the construction cost must be very small! 
+    atomic_backoff() : count(1) {} 
+    // This constructor pauses immediately; do not use on hot paths! 
+    atomic_backoff(bool) : count(1) { pause(); } 
+ 
+    //! No Copy 
+    atomic_backoff(const atomic_backoff&) = delete; 
+    atomic_backoff& operator=(const atomic_backoff&) = delete; 
+ 
+    //! Pause for a while. 
+    void pause() { 
+        if (count <= LOOPS_BEFORE_YIELD) { 
+            machine_pause(count); 
+            // Pause twice as long the next time. 
+            count *= 2; 
+        } else { 
+            // Pause is so long that we might as well yield CPU to scheduler. 
+            yield(); 
+        } 
+    } 
+ 
+    //! Pause for a few times and return false if saturated. 
+    bool bounded_pause() { 
+        machine_pause(count); 
+        if (count < LOOPS_BEFORE_YIELD) { 
+            // Pause twice as long the next time. 
+            count *= 2; 
+            return true; 
+        } else { 
+            return false; 
+        } 
+    } 
+ 
+    void reset() { 
+        count = 1; 
+    } 
+}; 
+ 
+//! Spin WHILE the condition is true. 
+/** T and U should be comparable types. */ 
+template <typename T, typename C> 
+void spin_wait_while_condition(const std::atomic<T>& location, C comp) { 
+    atomic_backoff backoff; 
+    while (comp(location.load(std::memory_order_acquire))) { 
+        backoff.pause(); 
+    } 
+} 
+ 
+//! Spin WHILE the value of the variable is equal to a given value 
+/** T and U should be comparable types. */ 
+template <typename T, typename U> 
+void spin_wait_while_eq(const std::atomic<T>& location, const U value) { 
+    spin_wait_while_condition(location, [&value](T t) { return t == value; }); 
+} 
+ 
+//! Spin UNTIL the value of the variable is equal to a given value 
+/** T and U should be comparable types. */ 
+template<typename T, typename U> 
+void spin_wait_until_eq(const std::atomic<T>& location, const U value) { 
+    spin_wait_while_condition(location, [&value](T t) { return t != value; }); 
+} 
+ 
+template <typename T> 
+std::uintptr_t log2(T in) { 
+    __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); 
+    return machine_log2(in); 
+} 
+ 
+template<typename T> 
+T reverse_bits(T src) { 
+    return machine_reverse_bits(src); 
+} 
+ 
+template<typename T> 
+T reverse_n_bits(T src, std::size_t n) { 
+    __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior."); 
+    return reverse_bits(src) >> (number_of_bits<T>() - n); 
+} 
+ 
+// A function to check if passed integer is a power of two 
+template <typename IntegerType> 
+constexpr bool is_power_of_two( IntegerType arg ) { 
+    static_assert(std::is_integral<IntegerType>::value, 
+                  "An argument for is_power_of_two should be integral type"); 
+    return arg && (0 == (arg & (arg - 1))); 
+} 
+ 
+// A function to determine if passed integer is a power of two 
+// at least as big as another power of two, i.e. for strictly positive i and j, 
+// with j being a power of two, determines whether i==j<<k for some nonnegative k 
+template <typename ArgIntegerType, typename DivisorIntegerType> 
+constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) { 
+    // Divisor should be a power of two 
+    static_assert(std::is_integral<ArgIntegerType>::value, 
+                  "An argument for is_power_of_two_at_least should be integral type"); 
+    return 0 == (arg & (arg - divisor)); 
+} 
+ 
+// A function to compute arg modulo divisor where divisor is a power of 2. 
+template<typename ArgIntegerType, typename DivisorIntegerType> 
+inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) { 
+    __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" ); 
+    return arg & (divisor - 1); 
+} 
+ 
+//! A function to check if passed in pointer is aligned on a specific border 
+template<typename T> 
+constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) { 
+    return 0 == ((std::uintptr_t)pointer & (alignment - 1)); 
+} 
+ 
+#if TBB_USE_ASSERT 
+static void* const poisoned_ptr = reinterpret_cast<void*>(-1); 
+ 
+//! Set p to invalid pointer value. 
+template<typename T> 
+inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); } 
+ 
+template<typename T> 
+inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); } 
+ 
+/** Expected to be used in assertions only, thus no empty form is defined. **/ 
+template<typename T> 
+inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); } 
+ 
+template<typename T> 
+inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } 
+#else 
+template<typename T> 
+inline void poison_pointer(T* &) {/*do nothing*/} 
+ 
+template<typename T> 
+inline void poison_pointer(std::atomic<T*>&) { /* do nothing */} 
+#endif /* !TBB_USE_ASSERT */ 
+ 
+template <std::size_t alignment = 0, typename T> 
+bool assert_pointer_valid(T* p, const char* comment = nullptr) { 
+    suppress_unused_warning(p, comment); 
+    __TBB_ASSERT(p != nullptr, comment); 
+    __TBB_ASSERT(!is_poisoned(p), comment); 
+#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER) 
+    __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment); 
+#endif 
+    // Returns something to simplify assert_pointers_valid implementation. 
+    return true; 
+} 
+ 
+template <typename... Args> 
+void assert_pointers_valid(Args*... p) { 
+    // suppress_unused_warning is used as an evaluation context for the variadic pack. 
+    suppress_unused_warning(assert_pointer_valid(p)...); 
+} 
+ 
+//! Base class for types that should not be assigned. 
+class no_assign { 
+public: 
+    void operator=(const no_assign&) = delete; 
+    no_assign(const no_assign&) = default; 
+    no_assign() = default; 
+}; 
+ 
+//! Base class for types that should not be copied or assigned. 
+class no_copy: no_assign { 
+public: 
+    no_copy(const no_copy&) = delete; 
+    no_copy() = default; 
+}; 
+ 
+template <typename T> 
+void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){ 
+    T tmp = lhs.load(std::memory_order_relaxed); 
+    lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+    rhs.store(tmp, std::memory_order_relaxed); 
+} 
+ 
+//! One-time initialization states 
+enum class do_once_state { 
+    uninitialized = 0,      ///< No execution attempts have been undertaken yet 
+    pending,                ///< A thread is executing associated do-once routine 
+    executed,               ///< Do-once routine has been executed 
+    initialized = executed  ///< Convenience alias 
+}; 
+ 
+//! One-time initialization function 
+/** /param initializer Pointer to function without arguments 
+           The variant that returns bool is used for cases when initialization can fail 
+           and it is OK to continue execution, but the state should be reset so that 
+           the initialization attempt was repeated the next time. 
+    /param state Shared state associated with initializer that specifies its 
+            initialization state. Must be initially set to #uninitialized value 
+            (e.g. by means of default static zero initialization). **/ 
+template <typename F> 
+void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) { 
+    // The loop in the implementation is necessary to avoid race when thread T2 
+    // that arrived in the middle of initialization attempt by another thread T1 
+    // has just made initialization possible. 
+    // In such a case T2 has to rely on T1 to initialize, but T1 may already be past 
+    // the point where it can recognize the changed conditions. 
+    do_once_state expected_state; 
+    while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) { 
+        if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) { 
+            expected_state = do_once_state::uninitialized; 
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 
+            using enum_type = typename std::underlying_type<do_once_state>::type; 
+            if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) { 
+#else 
+            if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) { 
+#endif 
+                run_initializer( initializer, state ); 
+                break; 
+            } 
+        } 
+        spin_wait_while_eq( state, do_once_state::pending ); 
+    } 
+} 
+ 
+// Run the initializer which can not fail 
+template<typename Functor> 
+void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) { 
+    f(); 
+    state.store(do_once_state::executed, std::memory_order_release); 
+} 
+ 
+#if __TBB_CPP20_CONCEPTS_PRESENT 
+template <typename T> 
+concept boolean_testable_impl = std::convertible_to<T, bool>; 
+ 
+template <typename T> 
+concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) { 
+                               { !std::forward<T>(t) } -> boolean_testable_impl; 
+                           }; 
+ 
+#if __TBB_CPP20_COMPARISONS_PRESENT 
+struct synthesized_three_way_comparator { 
+    template <typename T1, typename T2> 
+    auto operator()( const T1& lhs, const T2& rhs ) const 
+        requires requires { 
+            { lhs < rhs } -> boolean_testable; 
+            { rhs < lhs } -> boolean_testable; 
+        } 
+    { 
+        if constexpr (std::three_way_comparable_with<T1, T2>) { 
+            return lhs <=> rhs; 
+        } else { 
+            if (lhs < rhs) { 
+                return std::weak_ordering::less; 
+            } 
+            if (rhs < lhs) { 
+                return std::weak_ordering::greater; 
+            } 
+            return std::weak_ordering::equivalent; 
+        } 
+    } 
+}; // struct synthesized_three_way_comparator 
+ 
+template <typename T1, typename T2 = T1> 
+using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(), 
+                                                                                 std::declval<T2&>())); 
+ 
+#endif // __TBB_CPP20_COMPARISONS_PRESENT 
+#endif // __TBB_CPP20_CONCEPTS_PRESENT 
+ 
+} // namespace d0 
+ 
+namespace d1 { 
+ 
+class delegate_base { 
+public: 
+    virtual bool operator()() const = 0; 
+    virtual ~delegate_base() {} 
+}; // class delegate_base 
+ 
+}  // namespace d1 
+ 
+} // namespace detail 
+} // namespace tbb 
+ 
+#endif // __TBB_detail__utils_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h
index 246447a213..70c1f98336 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h
@@ -1,1113 +1,1113 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_enumerable_thread_specific_H
-#define __TBB_enumerable_thread_specific_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_assert.h"
-#include "detail/_template_helpers.h"
-#include "detail/_aligned_space.h"
-
-#include "concurrent_vector.h"
-#include "tbb_allocator.h"
-#include "cache_aligned_allocator.h"
-#include "profiling.h"
-
-#include <atomic>
-#include <thread>
-#include <cstring> // memcpy
-#include <cstddef> // std::ptrdiff_t
-
-#include "task.h" // for task::suspend_point
-
-#if _WIN32 || _WIN64
-#include <windows.h>
-#else
-#include <pthread.h>
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! enum for selecting between single key and key-per-instance versions
-enum ets_key_usage_type {
-    ets_key_per_instance
-    , ets_no_key
-#if __TBB_RESUMABLE_TASKS
-    , ets_suspend_aware
-#endif
-};
-
-// Forward declaration to use in internal classes
-template <typename T, typename Allocator, ets_key_usage_type ETS_key_type>
-class enumerable_thread_specific;
-
-template <std::size_t ThreadIDSize>
-struct internal_ets_key_selector {
-    using key_type = std::thread::id;
-    static key_type current_key() {
-        return std::this_thread::get_id();
-    }
-};
-
-// Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types
-#if __INTEL_COMPILER && __APPLE__
-template<>
-struct internal_ets_key_selector<sizeof(std::size_t)> {
-    using key_type = std::size_t;
-    static key_type current_key() {
-        auto id = std::this_thread::get_id();
-        return reinterpret_cast<key_type&>(id);
-    }
-};
-#endif
-
-template <ets_key_usage_type ETS_key_type>
-struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {};
-
-#if __TBB_RESUMABLE_TASKS
-template <>
-struct ets_key_selector<ets_suspend_aware> {
-    using key_type = suspend_point;
-    static key_type current_key() {
-        return r1::current_suspend_point();
-    }
-};
-#endif
-
-template<ets_key_usage_type ETS_key_type>
-class ets_base : detail::no_copy {
-protected:
-    using key_type = typename ets_key_selector<ETS_key_type>::key_type;
-
-public:
-    struct slot;
-    struct array {
-        array* next;
-        std::size_t lg_size;
-        slot& at( std::size_t k ) {
-            return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k];
-        }
-        std::size_t size() const { return std::size_t(1) << lg_size; }
-        std::size_t mask() const { return size() - 1; }
-        std::size_t start( std::size_t h ) const {
-            return h >> (8 * sizeof(std::size_t) - lg_size);
-        }
-    };
-    struct slot {
-        std::atomic<key_type> key;
-        void* ptr;
-        bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); }
-        bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; }
-        bool claim( key_type k ) {
-            // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size
-            key_type expected = key_type();
-            return key.compare_exchange_strong(expected, k);
-        }
-    };
-
-protected:
-    //! Root of linked list of arrays of decreasing size.
-    /** nullptr if and only if my_count==0.
-        Each array in the list is half the size of its predecessor. */
-    std::atomic<array*> my_root;
-    std::atomic<std::size_t> my_count;
-
-    virtual void* create_local() = 0;
-    virtual void* create_array(std::size_t _size) = 0;  // _size in bytes
-    virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes
-
-    array* allocate( std::size_t lg_size ) {
-        std::size_t n = std::size_t(1) << lg_size;
-        array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot)));
-        a->lg_size = lg_size;
-        std::memset( a + 1, 0, n * sizeof(slot) );
-        return a;
-    }
-    void free(array* a) {
-        std::size_t n = std::size_t(1) << (a->lg_size);
-        free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) );
-    }
-
-    ets_base() : my_root{nullptr}, my_count{0} {}
-    virtual ~ets_base();  // g++ complains if this is not virtual
-
-    void* table_lookup( bool& exists );
-    void  table_clear();
-    // The following functions are not used in concurrent context,
-    // so we don't need synchronization and ITT annotations there.
-    template <ets_key_usage_type E2>
-    void table_elementwise_copy( const ets_base& other,
-                                 void*(*add_element)(ets_base<E2>&, void*) ) {
-        __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL);
-        __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL);
-        if( !other.my_root.load(std::memory_order_relaxed) ) return;
-        array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size);
-        my_root.store(root, std::memory_order_relaxed);
-        root->next = nullptr;
-        my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
-        std::size_t mask = root->mask();
-        for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) {
-            for( std::size_t i = 0; i < r->size(); ++i ) {
-                slot& s1 = r->at(i);
-                if( !s1.empty() ) {
-                    for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) {
-                        slot& s2 = root->at(j);
-                        if( s2.empty() ) {
-                            s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr);
-                            s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
-                            break;
-                        }
-                        else if( s2.match(s1.key.load(std::memory_order_relaxed)) )
-                            break;
-                    }
-                }
-            }
-        }
-    }
-    void table_swap( ets_base& other ) {
-       __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
-       swap_atomics_relaxed(my_root, other.my_root);
-       swap_atomics_relaxed(my_count, other.my_count);
-    }
-};
-
-template<ets_key_usage_type ETS_key_type>
-ets_base<ETS_key_type>::~ets_base() {
-    __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
-}
-
-template<ets_key_usage_type ETS_key_type>
-void ets_base<ETS_key_type>::table_clear() {
-    while ( array* r = my_root.load(std::memory_order_relaxed) ) {
-        my_root.store(r->next, std::memory_order_relaxed);
-        free(r);
-    }
-    my_count.store(0, std::memory_order_relaxed);
-}
-
-template<ets_key_usage_type ETS_key_type>
-void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
-    const key_type k = ets_key_selector<ETS_key_type>::current_key();
-
-    __TBB_ASSERT(k != key_type(),NULL);
-    void* found;
-    std::size_t h = std::hash<key_type>{}(k);
-    for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) {
-        call_itt_notify(acquired,r);
-        std::size_t mask=r->mask();
-        for(std::size_t i = r->start(h); ;i=(i+1)&mask) {
-            slot& s = r->at(i);
-            if( s.empty() ) break;
-            if( s.match(k) ) {
-                if( r == my_root.load(std::memory_order_acquire) ) {
-                    // Success at top level
-                    exists = true;
-                    return s.ptr;
-                } else {
-                    // Success at some other level.  Need to insert at top level.
-                    exists = true;
-                    found = s.ptr;
-                    goto insert;
-                }
-            }
-        }
-    }
-    // Key does not yet exist.  The density of slots in the table does not exceed 0.5,
-    // for if this will occur a new table is allocated with double the current table
-    // size, which is swapped in as the new root table.  So an empty slot is guaranteed.
-    exists = false;
-    found = create_local();
-    {
-        std::size_t c = ++my_count;
-        array* r = my_root.load(std::memory_order_acquire);
-        call_itt_notify(acquired,r);
-        if( !r || c > r->size()/2 ) {
-            std::size_t s = r ? r->lg_size : 2;
-            while( c > std::size_t(1)<<(s-1) ) ++s;
-            array* a = allocate(s);
-            for(;;) {
-                a->next = r;
-                call_itt_notify(releasing,a);
-                array* new_r = r;
-                if( my_root.compare_exchange_strong(new_r, a) ) break;
-                call_itt_notify(acquired, new_r);
-                __TBB_ASSERT(new_r != nullptr, nullptr);
-                if( new_r->lg_size >= s ) {
-                    // Another thread inserted an equal or  bigger array, so our array is superfluous.
-                    free(a);
-                    break;
-                }
-                r = new_r;
-            }
-        }
-    }
-    insert:
-    // Whether a slot has been found in an older table, or if it has been inserted at this level,
-    // it has already been accounted for in the total.  Guaranteed to be room for it, and it is
-    // not present, so search for empty slot and use it.
-    array* ir = my_root.load(std::memory_order_acquire);
-    call_itt_notify(acquired, ir);
-    std::size_t mask = ir->mask();
-    for(std::size_t i = ir->start(h);; i = (i+1)&mask) {
-        slot& s = ir->at(i);
-        if( s.empty() ) {
-            if( s.claim(k) ) {
-                s.ptr = found;
-                return found;
-            }
-        }
-    }
-}
-
-//! Specialization that exploits native TLS
-template <>
-class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> {
-    using super = ets_base<ets_no_key>;
-#if _WIN32||_WIN64
-#if __TBB_WIN8UI_SUPPORT
-    using tls_key_t = DWORD;
-    void create_key() { my_key = FlsAlloc(NULL); }
-    void destroy_key() { FlsFree(my_key); }
-    void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); }
-    void* get_tls() { return (void *)FlsGetValue(my_key); }
-#else
-    using tls_key_t = DWORD;
-    void create_key() { my_key = TlsAlloc(); }
-    void destroy_key() { TlsFree(my_key); }
-    void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
-    void* get_tls() { return (void *)TlsGetValue(my_key); }
-#endif
-#else
-    using tls_key_t = pthread_key_t;
-    void create_key() { pthread_key_create(&my_key, NULL); }
-    void destroy_key() { pthread_key_delete(my_key); }
-    void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
-    void* get_tls() const { return pthread_getspecific(my_key); }
-#endif
-    tls_key_t my_key;
-    virtual void* create_local() override = 0;
-    virtual void* create_array(std::size_t _size) override = 0;  // _size in bytes
-    virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes
-protected:
-    ets_base() {create_key();}
-    ~ets_base() {destroy_key();}
-    void* table_lookup( bool& exists ) {
-        void* found = get_tls();
-        if( found ) {
-            exists=true;
-        } else {
-            found = super::table_lookup(exists);
-            set_tls(found);
-        }
-        return found;
-    }
-    void table_clear() {
-        destroy_key();
-        create_key();
-        super::table_clear();
-    }
-    void table_swap( ets_base& other ) {
-       using std::swap;
-       __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
-       swap(my_key, other.my_key);
-       super::table_swap(other);
-    }
-};
-
-//! Random access iterator for traversing the thread local copies.
-template< typename Container, typename Value >
-class enumerable_thread_specific_iterator
-{
-    //! current position in the concurrent_vector
-
-    Container *my_container;
-    typename Container::size_type my_index;
-    mutable Value *my_value;
-
-    template<typename C, typename T, typename U>
-    friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i,
-                     const enumerable_thread_specific_iterator<C, U>& j );
-
-    template<typename C, typename T, typename U>
-    friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i,
-                           const enumerable_thread_specific_iterator<C,U>& j );
-
-    template<typename C, typename T, typename U>
-    friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i,
-                                const enumerable_thread_specific_iterator<C,U>& j );
-
-    template<typename C, typename U>
-    friend class enumerable_thread_specific_iterator;
-
-public:
-    //! STL support
-    using difference_type = std::ptrdiff_t;
-    using value_type = Value;
-    using pointer = Value*;
-    using reference = Value&;
-    using iterator_category = std::random_access_iterator_tag;
-
-    enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) :
-        my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {}
-
-    //! Default constructor
-    enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {}
-
-    template<typename U>
-    enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
-            my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
-
-    enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const {
-        return enumerable_thread_specific_iterator(*my_container, my_index + offset);
-    }
-
-    friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) {
-        return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset);
-    }
-
-    enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) {
-        my_index += offset;
-        my_value = nullptr;
-        return *this;
-    }
-
-    enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const {
-        return enumerable_thread_specific_iterator( *my_container, my_index-offset );
-    }
-
-    enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) {
-        my_index -= offset;
-        my_value = nullptr;
-        return *this;
-    }
-
-    Value& operator*() const {
-        Value* value = my_value;
-        if( !value ) {
-            value = my_value = (*my_container)[my_index].value();
-        }
-        __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" );
-        return *value;
-    }
-
-    Value& operator[]( std::ptrdiff_t k ) const {
-       return *(*my_container)[my_index + k].value();
-    }
-
-    Value* operator->() const {return &operator*();}
-
-    enumerable_thread_specific_iterator& operator++() {
-        ++my_index;
-        my_value = nullptr;
-        return *this;
-    }
-
-    enumerable_thread_specific_iterator& operator--() {
-        --my_index;
-        my_value = nullptr;
-        return *this;
-    }
-
-    //! Post increment
-    enumerable_thread_specific_iterator operator++(int) {
-        enumerable_thread_specific_iterator result = *this;
-        ++my_index;
-        my_value = nullptr;
-        return result;
-    }
-
-    //! Post decrement
-    enumerable_thread_specific_iterator operator--(int) {
-        enumerable_thread_specific_iterator result = *this;
-        --my_index;
-        my_value = nullptr;
-        return result;
-    }
-};
-
-template<typename Container, typename T, typename U>
-bool operator==( const enumerable_thread_specific_iterator<Container, T>& i,
-                 const enumerable_thread_specific_iterator<Container, U>& j ) {
-    return i.my_index == j.my_index && i.my_container == j.my_container;
-}
-
-template<typename Container, typename T, typename U>
-bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i,
-                 const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return !(i==j);
-}
-
-template<typename Container, typename T, typename U>
-bool operator<( const enumerable_thread_specific_iterator<Container,T>& i,
-                const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return i.my_index<j.my_index;
-}
-
-template<typename Container, typename T, typename U>
-bool operator>( const enumerable_thread_specific_iterator<Container,T>& i,
-                const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return j<i;
-}
-
-template<typename Container, typename T, typename U>
-bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i,
-                 const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return !(i<j);
-}
-
-template<typename Container, typename T, typename U>
-bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i,
-                 const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return !(j<i);
-}
-
-template<typename Container, typename T, typename U>
-std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i,
-                     const enumerable_thread_specific_iterator<Container,U>& j ) {
-    return i.my_index-j.my_index;
-}
-
-template<typename SegmentedContainer, typename Value >
-class segmented_iterator
-{
-    template<typename C, typename T, typename U>
-    friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
-
-    template<typename C, typename T, typename U>
-    friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
-
-    template<typename C, typename U>
-    friend class segmented_iterator;
-
-public:
-    segmented_iterator() {my_segcont = nullptr;}
-
-    segmented_iterator( const SegmentedContainer& _segmented_container ) :
-        my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
-        outer_iter(my_segcont->end()) { }
-
-    ~segmented_iterator() {}
-
-    using InnerContainer = typename SegmentedContainer::value_type;
-    using inner_iterator = typename InnerContainer::iterator;
-    using outer_iterator = typename SegmentedContainer::iterator;
-
-    // STL support
-    // TODO: inherit all types from segmented container?
-    using difference_type = std::ptrdiff_t;
-    using value_type = Value;
-    using size_type = typename SegmentedContainer::size_type;
-    using pointer = Value*;
-    using reference = Value&;
-    using iterator_category = std::input_iterator_tag;
-
-    // Copy Constructor
-    template<typename U>
-    segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
-        my_segcont(other.my_segcont),
-        outer_iter(other.outer_iter),
-        // can we assign a default-constructed iterator to inner if we're at the end?
-        inner_iter(other.inner_iter)
-    {}
-
-    // assignment
-    template<typename U>
-    segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
-        my_segcont = other.my_segcont;
-        outer_iter = other.outer_iter;
-        if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
-        return *this;
-    }
-
-    // allow assignment of outer iterator to segmented iterator.  Once it is
-    // assigned, move forward until a non-empty inner container is found or
-    // the end of the outer container is reached.
-    segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
-        __TBB_ASSERT(my_segcont != nullptr, NULL);
-        // check that this iterator points to something inside the segmented container
-        for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
-            if( !outer_iter->empty() ) {
-                inner_iter = outer_iter->begin();
-                break;
-            }
-        }
-        return *this;
-    }
-
-    // pre-increment
-    segmented_iterator& operator++() {
-        advance_me();
-        return *this;
-    }
-
-    // post-increment
-    segmented_iterator operator++(int) {
-        segmented_iterator tmp = *this;
-        operator++();
-        return tmp;
-    }
-
-    bool operator==(const outer_iterator& other_outer) const {
-        __TBB_ASSERT(my_segcont != nullptr, NULL);
-        return (outer_iter == other_outer &&
-                (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
-    }
-
-    bool operator!=(const outer_iterator& other_outer) const {
-        return !operator==(other_outer);
-
-    }
-
-    // (i)* RHS
-    reference operator*() const {
-        __TBB_ASSERT(my_segcont != nullptr, NULL);
-        __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
-        __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen
-        return *inner_iter;
-    }
-
-    // i->
-    pointer operator->() const { return &operator*();}
-
-private:
-    SegmentedContainer* my_segcont;
-    outer_iterator outer_iter;
-    inner_iterator inner_iter;
-
-    void advance_me() {
-        __TBB_ASSERT(my_segcont != nullptr, NULL);
-        __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers
-        __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty.
-        ++inner_iter;
-        while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
-            inner_iter = outer_iter->begin();
-        }
-    }
-};    // segmented_iterator
-
-template<typename SegmentedContainer, typename T, typename U>
-bool operator==( const segmented_iterator<SegmentedContainer,T>& i,
-                 const segmented_iterator<SegmentedContainer,U>& j ) {
-    if(i.my_segcont != j.my_segcont) return false;
-    if(i.my_segcont == nullptr) return true;
-    if(i.outer_iter != j.outer_iter) return false;
-    if(i.outer_iter == i.my_segcont->end()) return true;
-    return i.inner_iter == j.inner_iter;
-}
-
-// !=
-template<typename SegmentedContainer, typename T, typename U>
-bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
-                 const segmented_iterator<SegmentedContainer,U>& j ) {
-    return !(i==j);
-}
-
-template<typename T>
-struct construct_by_default: no_assign {
-    void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
-    construct_by_default( int ) {}
-};
-
-template<typename T>
-struct construct_by_exemplar: no_assign {
-    const T exemplar;
-    void construct(void*where) {new(where) T(exemplar);}
-    construct_by_exemplar( const T& t ) : exemplar(t) {}
-    construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {}
-};
-
-template<typename T, typename Finit>
-struct construct_by_finit: no_assign {
-    Finit f;
-    void construct(void* where) {new(where) T(f());}
-    construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {}
-};
-
-template<typename T, typename... P>
-struct construct_by_args: no_assign {
-    stored_pack<P...> pack;
-    void construct(void* where) {
-        call( [where](const typename std::decay<P>::type&... args ){
-           new(where) T(args...);
-        }, pack );
-    }
-    construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {}
-};
-
-// storage for initialization function pointer
-// TODO: consider removing the template parameter T here and in callback_leaf
-class callback_base {
-public:
-    // Clone *this
-    virtual callback_base* clone() const = 0;
-    // Destruct and free *this
-    virtual void destroy() = 0;
-    // Need virtual destructor to satisfy GCC compiler warning
-    virtual ~callback_base() { }
-    // Construct T at where
-    virtual void construct(void* where) = 0;
-};
-
-template <typename Constructor>
-class callback_leaf: public callback_base, Constructor {
-    template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {}
-    // TODO: make the construction/destruction consistent (use allocator.construct/destroy)
-    using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>;
-
-    callback_base* clone() const override {
-        return make(*this);
-    }
-
-    void destroy() override {
-        my_allocator_type alloc;
-        tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this);
-        tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1);
-    }
-
-    void construct(void* where) override {
-        Constructor::construct(where);
-    }
-
-public:
-    template<typename... P>
-    static callback_base* make( P&& ... params ) {
-        void* where = my_allocator_type().allocate(1);
-        return new(where) callback_leaf( std::forward<P>(params)... );
-    }
-};
-
-//! Template for recording construction of objects in table
-/** All maintenance of the space will be done explicitly on push_back,
-    and all thread local copies must be destroyed before the concurrent
-    vector is deleted.
-
-    The flag is_built is initialized to false.  When the local is
-    successfully-constructed, set the flag to true or call value_committed().
-    If the constructor throws, the flag will be false.
-*/
-template<typename U>
-struct ets_element {
-    detail::aligned_space<U> my_space;
-    bool is_built;
-    ets_element() { is_built = false; }  // not currently-built
-    U* value() { return my_space.begin(); }
-    U* value_committed() { is_built = true; return my_space.begin(); }
-    ~ets_element() {
-        if(is_built) {
-            my_space.begin()->~U();
-            is_built = false;
-        }
-    }
-};
-
-// A predicate that can be used for a compile-time compatibility check of ETS instances
-// Ideally, it should have been declared inside the ETS class, but unfortunately
-// in that case VS2013 does not enable the variadic constructor.
-template<typename T, typename ETS> struct is_compatible_ets : std::false_type {};
-template<typename T, typename U, typename A, ets_key_usage_type C>
-struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {};
-
-// A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression
-template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()());
-template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>;
-
-//! The enumerable_thread_specific container
-/** enumerable_thread_specific has the following properties:
-    - thread-local copies are lazily created, with default, exemplar or function initialization.
-    - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
-    - the contained objects need not have operator=() defined if combine is not used.
-    - enumerable_thread_specific containers may be copy-constructed or assigned.
-    - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
-    - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
-
-@par Segmented iterator
-    When the thread-local objects are containers with input_iterators defined, a segmented iterator may
-    be used to iterate over all the elements of all thread-local copies.
-
-@par combine and combine_each
-    - Both methods are defined for enumerable_thread_specific.
-    - combine() requires the type T have operator=() defined.
-    - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)
-    - Both are evaluated in serial context (the methods are assumed to be non-benign.)
-
-@ingroup containers */
-template <typename T, typename Allocator=cache_aligned_allocator<T>,
-          ets_key_usage_type ETS_key_type=ets_no_key >
-class enumerable_thread_specific: ets_base<ETS_key_type> {
-
-    template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
-
-    using padded_element = padded<ets_element<T>>;
-
-    //! A generic range, used to create range objects from the iterators
-    template<typename I>
-    class generic_range_type: public blocked_range<I> {
-    public:
-        using value_type = T;
-        using reference = T&;
-        using const_reference = const T&;
-        using iterator = I;
-        using difference_type = std::ptrdiff_t;
-
-        generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
-        template<typename U>
-        generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
-        generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
-    };
-
-    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
-
-    using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>;
-    using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >;
-
-    callback_base *my_construct_callback;
-
-    internal_collection_type my_locals;
-
-    // TODO: consider unifying the callback mechanism for all create_local* methods below
-    //   (likely non-compatible and requires interface version increase)
-    void* create_local() override {
-        padded_element& lref = *my_locals.grow_by(1);
-        my_construct_callback->construct(lref.value());
-        return lref.value_committed();
-    }
-
-    static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) {
-        enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
-        padded_element& lref = *ets.my_locals.grow_by(1);
-        new(lref.value()) T(*static_cast<T*>(p));
-        return lref.value_committed();
-    }
-
-    static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) {
-        enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
-        padded_element& lref = *ets.my_locals.grow_by(1);
-        new(lref.value()) T(std::move(*static_cast<T*>(p)));
-        return lref.value_committed();
-    }
-
-    using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>;
-
-    // _size is in bytes
-    void* create_array(std::size_t _size) override {
-        std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
-        return array_allocator_type().allocate(nelements);
-    }
-
-    void free_array( void* _ptr, std::size_t _size) override {
-        std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
-        array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
-    }
-
-public:
-
-    //! Basic types
-    using value_type = T;
-    using allocator_type = Allocator;
-    using size_type = typename internal_collection_type::size_type;
-    using difference_type = typename internal_collection_type::difference_type;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-
-    using pointer = typename allocator_traits_type::pointer;
-    using const_pointer = typename allocator_traits_type::const_pointer;
-
-    // Iterator types
-    using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>;
-    using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>;
-
-    // Parallel range types
-    using range_type = generic_range_type<iterator>;
-    using const_range_type = generic_range_type<const_iterator>;
-
-    //! Default constructor.  Each local instance of T is default constructed.
-    enumerable_thread_specific() : my_construct_callback(
-        callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0)
-    ){}
-
-    //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()).
-    template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type>
-    explicit enumerable_thread_specific( Finit finit ) : my_construct_callback(
-        callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) )
-    ){}
-
-    //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar.
-    explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback(
-        callback_leaf<construct_by_exemplar<T> >::make( exemplar )
-    ){}
-
-    explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback(
-        callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) )
-    ){}
-
-    //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...)
-    template <typename P1, typename... P,
-              typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value
-                                                      && !is_compatible_ets<T, typename std::decay<P1>::type>::value
-                                                      && !std::is_same<T, typename std::decay<P1>::type>::value
-                                                     >::type>
-    enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback(
-        callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... )
-    ){}
-
-    //! Destructor
-    ~enumerable_thread_specific() {
-        if(my_construct_callback) my_construct_callback->destroy();
-        // Deallocate the hash table before overridden free_array() becomes inaccessible
-        this->ets_base<ETS_key_type>::table_clear();
-    }
-
-    //! returns reference to local, discarding exists
-    reference local() {
-        bool exists;
-        return local(exists);
-    }
-
-    //! Returns reference to calling thread's local copy, creating one if necessary
-    reference local(bool& exists)  {
-        void* ptr = this->table_lookup(exists);
-        return *(T*)ptr;
-    }
-
-    //! Get the number of local copies
-    size_type size() const { return my_locals.size(); }
-
-    //! true if there have been no local copies created
-    bool empty() const { return my_locals.empty(); }
-
-    //! begin iterator
-    iterator begin() { return iterator( my_locals, 0 ); }
-    //! end iterator
-    iterator end() { return iterator(my_locals, my_locals.size() ); }
-
-    //! begin const iterator
-    const_iterator begin() const { return const_iterator(my_locals, 0); }
-
-    //! end const iterator
-    const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
-
-    //! Get range for parallel algorithms
-    range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); }
-
-    //! Get const range for parallel algorithms
-    const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
-
-    //! Destroys local copies
-    void clear() {
-        my_locals.clear();
-        this->table_clear();
-        // callback is not destroyed
-    }
-
-private:
-    template<typename A2, ets_key_usage_type C2>
-    void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) {
-        // this tests is_compatible_ets
-        static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
-        // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
-        my_construct_callback = other.my_construct_callback->clone();
-        __TBB_ASSERT(my_locals.size()==0,NULL);
-        my_locals.reserve(other.size());
-        this->table_elementwise_copy( other, create_local_by_copy );
-    }
-
-    void internal_swap(enumerable_thread_specific& other) {
-        using std::swap;
-        __TBB_ASSERT( this!=&other, NULL );
-        swap(my_construct_callback, other.my_construct_callback);
-        // concurrent_vector::swap() preserves storage space,
-        // so addresses to the vector kept in ETS hash table remain valid.
-        swap(my_locals, other.my_locals);
-        this->ets_base<ETS_key_type>::table_swap(other);
-    }
-
-    template<typename A2, ets_key_usage_type C2>
-    void internal_move(enumerable_thread_specific<T, A2, C2>&& other) {
-        static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
-        my_construct_callback = other.my_construct_callback;
-        other.my_construct_callback = nullptr;
-        __TBB_ASSERT(my_locals.size()==0,NULL);
-        my_locals.reserve(other.size());
-        this->table_elementwise_copy( other, create_local_by_move );
-    }
-
-public:
-    enumerable_thread_specific( const enumerable_thread_specific& other )
-    : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */
-    {
-        internal_copy(other);
-    }
-
-    template<typename Alloc, ets_key_usage_type Cachetype>
-    enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
-    {
-        internal_copy(other);
-    }
-
-    enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback()
-    {
-        // TODO: use internal_move correctly here
-        internal_swap(other);
-    }
-
-    template<typename Alloc, ets_key_usage_type Cachetype>
-    enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback()
-    {
-        internal_move(std::move(other));
-    }
-
-    enumerable_thread_specific& operator=( const enumerable_thread_specific& other )
-    {
-        if( this != &other ) {
-            this->clear();
-            my_construct_callback->destroy();
-            internal_copy( other );
-        }
-        return *this;
-    }
-
-    template<typename Alloc, ets_key_usage_type Cachetype>
-    enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
-    {
-        __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types
-        this->clear();
-        my_construct_callback->destroy();
-        internal_copy(other);
-        return *this;
-    }
-
-    enumerable_thread_specific& operator=( enumerable_thread_specific&& other )
-    {
-        if( this != &other ) {
-            // TODO: use internal_move correctly here
-            internal_swap(other);
-        }
-        return *this;
-    }
-
-    template<typename Alloc, ets_key_usage_type Cachetype>
-    enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other )
-    {
-        __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types
-        this->clear();
-        my_construct_callback->destroy();
-        internal_move(std::move(other));
-        return *this;
-    }
-
-    // CombineFunc has signature T(T,T) or T(const T&, const T&)
-    template <typename CombineFunc>
-    T combine(CombineFunc f_combine) {
-        if(begin() == end()) {
-            ets_element<T> location;
-            my_construct_callback->construct(location.value());
-            return *location.value_committed();
-        }
-        const_iterator ci = begin();
-        T my_result = *ci;
-        while(++ci != end())
-            my_result = f_combine( my_result, *ci );
-        return my_result;
-    }
-
-    // combine_func_t takes T by value or by [const] reference, and returns nothing
-    template <typename CombineFunc>
-    void combine_each(CombineFunc f_combine) {
-        for(iterator ci = begin(); ci != end(); ++ci) {
-            f_combine( *ci );
-        }
-    }
-
-}; // enumerable_thread_specific
-
-template< typename Container >
-class flattened2d {
-    // This intermediate typedef is to address issues with VC7.1 compilers
-    using conval_type = typename Container::value_type;
-
-public:
-    //! Basic types
-    using size_type = typename conval_type::size_type;
-    using difference_type = typename conval_type::difference_type;
-    using allocator_type = typename conval_type::allocator_type;
-    using value_type = typename conval_type::value_type;
-    using reference = typename conval_type::reference;
-    using const_reference = typename conval_type::const_reference;
-    using pointer = typename conval_type::pointer;
-    using const_pointer = typename conval_type::const_pointer;
-
-    using iterator = segmented_iterator<Container, value_type>;
-    using const_iterator = segmented_iterator<Container, const value_type>;
-
-    flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) :
-        my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
-
-    explicit flattened2d( const Container &c ) :
-        my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
-
-    iterator begin() { return iterator(*my_container) = my_begin; }
-    iterator end() { return iterator(*my_container) = my_end; }
-    const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
-    const_iterator end() const { return const_iterator(*my_container) = my_end; }
-
-    size_type size() const {
-        size_type tot_size = 0;
-        for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
-            tot_size += i->size();
-        }
-        return tot_size;
-    }
-
-private:
-    Container *my_container;
-    typename Container::const_iterator my_begin;
-    typename Container::const_iterator my_end;
-};
-
-template <typename Container>
-flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
-    return flattened2d<Container>(c, b, e);
-}
-
-template <typename Container>
-flattened2d<Container> flatten2d(const Container &c) {
-    return flattened2d<Container>(c);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::enumerable_thread_specific;
-using detail::d1::flattened2d;
-using detail::d1::flatten2d;
-// ets enum keys
-using detail::d1::ets_key_usage_type;
-using detail::d1::ets_key_per_instance;
-using detail::d1::ets_no_key;
-#if __TBB_RESUMABLE_TASKS
-using detail::d1::ets_suspend_aware;
-#endif
-} // inline namespace v1
-
-} // namespace tbb
-
-#endif // __TBB_enumerable_thread_specific_H
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_enumerable_thread_specific_H 
+#define __TBB_enumerable_thread_specific_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_assert.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_aligned_space.h" 
+ 
+#include "concurrent_vector.h" 
+#include "tbb_allocator.h" 
+#include "cache_aligned_allocator.h" 
+#include "profiling.h" 
+ 
+#include <atomic> 
+#include <thread> 
+#include <cstring> // memcpy 
+#include <cstddef> // std::ptrdiff_t 
+ 
+#include "task.h" // for task::suspend_point 
+ 
+#if _WIN32 || _WIN64 
+#include <windows.h> 
+#else 
+#include <pthread.h> 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! enum for selecting between single key and key-per-instance versions 
+enum ets_key_usage_type { 
+    ets_key_per_instance 
+    , ets_no_key 
+#if __TBB_RESUMABLE_TASKS 
+    , ets_suspend_aware 
+#endif 
+}; 
+ 
+// Forward declaration to use in internal classes 
+template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> 
+class enumerable_thread_specific; 
+ 
+template <std::size_t ThreadIDSize> 
+struct internal_ets_key_selector { 
+    using key_type = std::thread::id; 
+    static key_type current_key() { 
+        return std::this_thread::get_id(); 
+    } 
+}; 
+ 
+// Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types 
+#if __INTEL_COMPILER && __APPLE__ 
+template<> 
+struct internal_ets_key_selector<sizeof(std::size_t)> { 
+    using key_type = std::size_t; 
+    static key_type current_key() { 
+        auto id = std::this_thread::get_id(); 
+        return reinterpret_cast<key_type&>(id); 
+    } 
+}; 
+#endif 
+ 
+template <ets_key_usage_type ETS_key_type> 
+struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; 
+ 
+#if __TBB_RESUMABLE_TASKS 
+template <> 
+struct ets_key_selector<ets_suspend_aware> { 
+    using key_type = suspend_point; 
+    static key_type current_key() { 
+        return r1::current_suspend_point(); 
+    } 
+}; 
+#endif 
+ 
+template<ets_key_usage_type ETS_key_type> 
+class ets_base : detail::no_copy { 
+protected: 
+    using key_type = typename ets_key_selector<ETS_key_type>::key_type; 
+ 
+public: 
+    struct slot; 
+    struct array { 
+        array* next; 
+        std::size_t lg_size; 
+        slot& at( std::size_t k ) { 
+            return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; 
+        } 
+        std::size_t size() const { return std::size_t(1) << lg_size; } 
+        std::size_t mask() const { return size() - 1; } 
+        std::size_t start( std::size_t h ) const { 
+            return h >> (8 * sizeof(std::size_t) - lg_size); 
+        } 
+    }; 
+    struct slot { 
+        std::atomic<key_type> key; 
+        void* ptr; 
+        bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } 
+        bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } 
+        bool claim( key_type k ) { 
+            // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size 
+            key_type expected = key_type(); 
+            return key.compare_exchange_strong(expected, k); 
+        } 
+    }; 
+ 
+protected: 
+    //! Root of linked list of arrays of decreasing size. 
+    /** nullptr if and only if my_count==0. 
+        Each array in the list is half the size of its predecessor. */ 
+    std::atomic<array*> my_root; 
+    std::atomic<std::size_t> my_count; 
+ 
+    virtual void* create_local() = 0; 
+    virtual void* create_array(std::size_t _size) = 0;  // _size in bytes 
+    virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes 
+ 
+    array* allocate( std::size_t lg_size ) { 
+        std::size_t n = std::size_t(1) << lg_size; 
+        array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); 
+        a->lg_size = lg_size; 
+        std::memset( a + 1, 0, n * sizeof(slot) ); 
+        return a; 
+    } 
+    void free(array* a) { 
+        std::size_t n = std::size_t(1) << (a->lg_size); 
+        free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); 
+    } 
+ 
+    ets_base() : my_root{nullptr}, my_count{0} {} 
+    virtual ~ets_base();  // g++ complains if this is not virtual 
+ 
+    void* table_lookup( bool& exists ); 
+    void  table_clear(); 
+    // The following functions are not used in concurrent context, 
+    // so we don't need synchronization and ITT annotations there. 
+    template <ets_key_usage_type E2> 
+    void table_elementwise_copy( const ets_base& other, 
+                                 void*(*add_element)(ets_base<E2>&, void*) ) { 
+        __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL); 
+        __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL); 
+        if( !other.my_root.load(std::memory_order_relaxed) ) return; 
+        array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); 
+        my_root.store(root, std::memory_order_relaxed); 
+        root->next = nullptr; 
+        my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+        std::size_t mask = root->mask(); 
+        for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { 
+            for( std::size_t i = 0; i < r->size(); ++i ) { 
+                slot& s1 = r->at(i); 
+                if( !s1.empty() ) { 
+                    for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { 
+                        slot& s2 = root->at(j); 
+                        if( s2.empty() ) { 
+                            s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); 
+                            s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); 
+                            break; 
+                        } 
+                        else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) 
+                            break; 
+                    } 
+                } 
+            } 
+        } 
+    } 
+    void table_swap( ets_base& other ) { 
+       __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 
+       swap_atomics_relaxed(my_root, other.my_root); 
+       swap_atomics_relaxed(my_count, other.my_count); 
+    } 
+}; 
+ 
+template<ets_key_usage_type ETS_key_type> 
+ets_base<ETS_key_type>::~ets_base() { 
+    __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); 
+} 
+ 
+template<ets_key_usage_type ETS_key_type> 
+void ets_base<ETS_key_type>::table_clear() { 
+    while ( array* r = my_root.load(std::memory_order_relaxed) ) { 
+        my_root.store(r->next, std::memory_order_relaxed); 
+        free(r); 
+    } 
+    my_count.store(0, std::memory_order_relaxed); 
+} 
+ 
+template<ets_key_usage_type ETS_key_type> 
+void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { 
+    const key_type k = ets_key_selector<ETS_key_type>::current_key(); 
+ 
+    __TBB_ASSERT(k != key_type(),NULL); 
+    void* found; 
+    std::size_t h = std::hash<key_type>{}(k); 
+    for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { 
+        call_itt_notify(acquired,r); 
+        std::size_t mask=r->mask(); 
+        for(std::size_t i = r->start(h); ;i=(i+1)&mask) { 
+            slot& s = r->at(i); 
+            if( s.empty() ) break; 
+            if( s.match(k) ) { 
+                if( r == my_root.load(std::memory_order_acquire) ) { 
+                    // Success at top level 
+                    exists = true; 
+                    return s.ptr; 
+                } else { 
+                    // Success at some other level.  Need to insert at top level. 
+                    exists = true; 
+                    found = s.ptr; 
+                    goto insert; 
+                } 
+            } 
+        } 
+    } 
+    // Key does not yet exist.  The density of slots in the table does not exceed 0.5, 
+    // for if this will occur a new table is allocated with double the current table 
+    // size, which is swapped in as the new root table.  So an empty slot is guaranteed. 
+    exists = false; 
+    found = create_local(); 
+    { 
+        std::size_t c = ++my_count; 
+        array* r = my_root.load(std::memory_order_acquire); 
+        call_itt_notify(acquired,r); 
+        if( !r || c > r->size()/2 ) { 
+            std::size_t s = r ? r->lg_size : 2; 
+            while( c > std::size_t(1)<<(s-1) ) ++s; 
+            array* a = allocate(s); 
+            for(;;) { 
+                a->next = r; 
+                call_itt_notify(releasing,a); 
+                array* new_r = r; 
+                if( my_root.compare_exchange_strong(new_r, a) ) break; 
+                call_itt_notify(acquired, new_r); 
+                __TBB_ASSERT(new_r != nullptr, nullptr); 
+                if( new_r->lg_size >= s ) { 
+                    // Another thread inserted an equal or  bigger array, so our array is superfluous. 
+                    free(a); 
+                    break; 
+                } 
+                r = new_r; 
+            } 
+        } 
+    } 
+    insert: 
+    // Whether a slot has been found in an older table, or if it has been inserted at this level, 
+    // it has already been accounted for in the total.  Guaranteed to be room for it, and it is 
+    // not present, so search for empty slot and use it. 
+    array* ir = my_root.load(std::memory_order_acquire); 
+    call_itt_notify(acquired, ir); 
+    std::size_t mask = ir->mask(); 
+    for(std::size_t i = ir->start(h);; i = (i+1)&mask) { 
+        slot& s = ir->at(i); 
+        if( s.empty() ) { 
+            if( s.claim(k) ) { 
+                s.ptr = found; 
+                return found; 
+            } 
+        } 
+    } 
+} 
+ 
+//! Specialization that exploits native TLS 
+template <> 
+class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { 
+    using super = ets_base<ets_no_key>; 
+#if _WIN32||_WIN64 
+#if __TBB_WIN8UI_SUPPORT 
+    using tls_key_t = DWORD; 
+    void create_key() { my_key = FlsAlloc(NULL); } 
+    void destroy_key() { FlsFree(my_key); } 
+    void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } 
+    void* get_tls() { return (void *)FlsGetValue(my_key); } 
+#else 
+    using tls_key_t = DWORD; 
+    void create_key() { my_key = TlsAlloc(); } 
+    void destroy_key() { TlsFree(my_key); } 
+    void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } 
+    void* get_tls() { return (void *)TlsGetValue(my_key); } 
+#endif 
+#else 
+    using tls_key_t = pthread_key_t; 
+    void create_key() { pthread_key_create(&my_key, NULL); } 
+    void destroy_key() { pthread_key_delete(my_key); } 
+    void set_tls( void * value ) const { pthread_setspecific(my_key, value); } 
+    void* get_tls() const { return pthread_getspecific(my_key); } 
+#endif 
+    tls_key_t my_key; 
+    virtual void* create_local() override = 0; 
+    virtual void* create_array(std::size_t _size) override = 0;  // _size in bytes 
+    virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes 
+protected: 
+    ets_base() {create_key();} 
+    ~ets_base() {destroy_key();} 
+    void* table_lookup( bool& exists ) { 
+        void* found = get_tls(); 
+        if( found ) { 
+            exists=true; 
+        } else { 
+            found = super::table_lookup(exists); 
+            set_tls(found); 
+        } 
+        return found; 
+    } 
+    void table_clear() { 
+        destroy_key(); 
+        create_key(); 
+        super::table_clear(); 
+    } 
+    void table_swap( ets_base& other ) { 
+       using std::swap; 
+       __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); 
+       swap(my_key, other.my_key); 
+       super::table_swap(other); 
+    } 
+}; 
+ 
+//! Random access iterator for traversing the thread local copies. 
+template< typename Container, typename Value > 
+class enumerable_thread_specific_iterator 
+{ 
+    //! current position in the concurrent_vector 
+ 
+    Container *my_container; 
+    typename Container::size_type my_index; 
+    mutable Value *my_value; 
+ 
+    template<typename C, typename T, typename U> 
+    friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, 
+                     const enumerable_thread_specific_iterator<C, U>& j ); 
+ 
+    template<typename C, typename T, typename U> 
+    friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, 
+                           const enumerable_thread_specific_iterator<C,U>& j ); 
+ 
+    template<typename C, typename T, typename U> 
+    friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, 
+                                const enumerable_thread_specific_iterator<C,U>& j ); 
+ 
+    template<typename C, typename U> 
+    friend class enumerable_thread_specific_iterator; 
+ 
+public: 
+    //! STL support 
+    using difference_type = std::ptrdiff_t; 
+    using value_type = Value; 
+    using pointer = Value*; 
+    using reference = Value&; 
+    using iterator_category = std::random_access_iterator_tag; 
+ 
+    enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : 
+        my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} 
+ 
+    //! Default constructor 
+    enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} 
+ 
+    template<typename U> 
+    enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : 
+            my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} 
+ 
+    enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { 
+        return enumerable_thread_specific_iterator(*my_container, my_index + offset); 
+    } 
+ 
+    friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { 
+        return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); 
+    } 
+ 
+    enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { 
+        my_index += offset; 
+        my_value = nullptr; 
+        return *this; 
+    } 
+ 
+    enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { 
+        return enumerable_thread_specific_iterator( *my_container, my_index-offset ); 
+    } 
+ 
+    enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { 
+        my_index -= offset; 
+        my_value = nullptr; 
+        return *this; 
+    } 
+ 
+    Value& operator*() const { 
+        Value* value = my_value; 
+        if( !value ) { 
+            value = my_value = (*my_container)[my_index].value(); 
+        } 
+        __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); 
+        return *value; 
+    } 
+ 
+    Value& operator[]( std::ptrdiff_t k ) const { 
+       return *(*my_container)[my_index + k].value(); 
+    } 
+ 
+    Value* operator->() const {return &operator*();} 
+ 
+    enumerable_thread_specific_iterator& operator++() { 
+        ++my_index; 
+        my_value = nullptr; 
+        return *this; 
+    } 
+ 
+    enumerable_thread_specific_iterator& operator--() { 
+        --my_index; 
+        my_value = nullptr; 
+        return *this; 
+    } 
+ 
+    //! Post increment 
+    enumerable_thread_specific_iterator operator++(int) { 
+        enumerable_thread_specific_iterator result = *this; 
+        ++my_index; 
+        my_value = nullptr; 
+        return result; 
+    } 
+ 
+    //! Post decrement 
+    enumerable_thread_specific_iterator operator--(int) { 
+        enumerable_thread_specific_iterator result = *this; 
+        --my_index; 
+        my_value = nullptr; 
+        return result; 
+    } 
+}; 
+ 
+template<typename Container, typename T, typename U> 
+bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, 
+                 const enumerable_thread_specific_iterator<Container, U>& j ) { 
+    return i.my_index == j.my_index && i.my_container == j.my_container; 
+} 
+ 
+template<typename Container, typename T, typename U> 
+bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                 const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return !(i==j); 
+} 
+ 
+template<typename Container, typename T, typename U> 
+bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, 
+                const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return i.my_index<j.my_index; 
+} 
+ 
+template<typename Container, typename T, typename U> 
+bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, 
+                const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return j<i; 
+} 
+ 
+template<typename Container, typename T, typename U> 
+bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                 const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return !(i<j); 
+} 
+ 
+template<typename Container, typename T, typename U> 
+bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, 
+                 const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return !(j<i); 
+} 
+ 
+template<typename Container, typename T, typename U> 
+std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, 
+                     const enumerable_thread_specific_iterator<Container,U>& j ) { 
+    return i.my_index-j.my_index; 
+} 
+ 
+template<typename SegmentedContainer, typename Value > 
+class segmented_iterator 
+{ 
+    template<typename C, typename T, typename U> 
+    friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 
+ 
+    template<typename C, typename T, typename U> 
+    friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); 
+ 
+    template<typename C, typename U> 
+    friend class segmented_iterator; 
+ 
+public: 
+    segmented_iterator() {my_segcont = nullptr;} 
+ 
+    segmented_iterator( const SegmentedContainer& _segmented_container ) : 
+        my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), 
+        outer_iter(my_segcont->end()) { } 
+ 
+    ~segmented_iterator() {} 
+ 
+    using InnerContainer = typename SegmentedContainer::value_type; 
+    using inner_iterator = typename InnerContainer::iterator; 
+    using outer_iterator = typename SegmentedContainer::iterator; 
+ 
+    // STL support 
+    // TODO: inherit all types from segmented container? 
+    using difference_type = std::ptrdiff_t; 
+    using value_type = Value; 
+    using size_type = typename SegmentedContainer::size_type; 
+    using pointer = Value*; 
+    using reference = Value&; 
+    using iterator_category = std::input_iterator_tag; 
+ 
+    // Copy Constructor 
+    template<typename U> 
+    segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : 
+        my_segcont(other.my_segcont), 
+        outer_iter(other.outer_iter), 
+        // can we assign a default-constructed iterator to inner if we're at the end? 
+        inner_iter(other.inner_iter) 
+    {} 
+ 
+    // assignment 
+    template<typename U> 
+    segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { 
+        my_segcont = other.my_segcont; 
+        outer_iter = other.outer_iter; 
+        if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; 
+        return *this; 
+    } 
+ 
+    // allow assignment of outer iterator to segmented iterator.  Once it is 
+    // assigned, move forward until a non-empty inner container is found or 
+    // the end of the outer container is reached. 
+    segmented_iterator& operator=(const outer_iterator& new_outer_iter) { 
+        __TBB_ASSERT(my_segcont != nullptr, NULL); 
+        // check that this iterator points to something inside the segmented container 
+        for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { 
+            if( !outer_iter->empty() ) { 
+                inner_iter = outer_iter->begin(); 
+                break; 
+            } 
+        } 
+        return *this; 
+    } 
+ 
+    // pre-increment 
+    segmented_iterator& operator++() { 
+        advance_me(); 
+        return *this; 
+    } 
+ 
+    // post-increment 
+    segmented_iterator operator++(int) { 
+        segmented_iterator tmp = *this; 
+        operator++(); 
+        return tmp; 
+    } 
+ 
+    bool operator==(const outer_iterator& other_outer) const { 
+        __TBB_ASSERT(my_segcont != nullptr, NULL); 
+        return (outer_iter == other_outer && 
+                (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); 
+    } 
+ 
+    bool operator!=(const outer_iterator& other_outer) const { 
+        return !operator==(other_outer); 
+ 
+    } 
+ 
+    // (i)* RHS 
+    reference operator*() const { 
+        __TBB_ASSERT(my_segcont != nullptr, NULL); 
+        __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); 
+        __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen 
+        return *inner_iter; 
+    } 
+ 
+    // i-> 
+    pointer operator->() const { return &operator*();} 
+ 
+private: 
+    SegmentedContainer* my_segcont; 
+    outer_iterator outer_iter; 
+    inner_iterator inner_iter; 
+ 
+    void advance_me() { 
+        __TBB_ASSERT(my_segcont != nullptr, NULL); 
+        __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers 
+        __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty. 
+        ++inner_iter; 
+        while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { 
+            inner_iter = outer_iter->begin(); 
+        } 
+    } 
+};    // segmented_iterator 
+ 
+template<typename SegmentedContainer, typename T, typename U> 
+bool operator==( const segmented_iterator<SegmentedContainer,T>& i, 
+                 const segmented_iterator<SegmentedContainer,U>& j ) { 
+    if(i.my_segcont != j.my_segcont) return false; 
+    if(i.my_segcont == nullptr) return true; 
+    if(i.outer_iter != j.outer_iter) return false; 
+    if(i.outer_iter == i.my_segcont->end()) return true; 
+    return i.inner_iter == j.inner_iter; 
+} 
+ 
+// != 
+template<typename SegmentedContainer, typename T, typename U> 
+bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, 
+                 const segmented_iterator<SegmentedContainer,U>& j ) { 
+    return !(i==j); 
+} 
+ 
+template<typename T> 
+struct construct_by_default: no_assign { 
+    void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. 
+    construct_by_default( int ) {} 
+}; 
+ 
+template<typename T> 
+struct construct_by_exemplar: no_assign { 
+    const T exemplar; 
+    void construct(void*where) {new(where) T(exemplar);} 
+    construct_by_exemplar( const T& t ) : exemplar(t) {} 
+    construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} 
+}; 
+ 
+template<typename T, typename Finit> 
+struct construct_by_finit: no_assign { 
+    Finit f; 
+    void construct(void* where) {new(where) T(f());} 
+    construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} 
+}; 
+ 
+template<typename T, typename... P> 
+struct construct_by_args: no_assign { 
+    stored_pack<P...> pack; 
+    void construct(void* where) { 
+        call( [where](const typename std::decay<P>::type&... args ){ 
+           new(where) T(args...); 
+        }, pack ); 
+    } 
+    construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} 
+}; 
+ 
+// storage for initialization function pointer 
+// TODO: consider removing the template parameter T here and in callback_leaf 
+class callback_base { 
+public: 
+    // Clone *this 
+    virtual callback_base* clone() const = 0; 
+    // Destruct and free *this 
+    virtual void destroy() = 0; 
+    // Need virtual destructor to satisfy GCC compiler warning 
+    virtual ~callback_base() { } 
+    // Construct T at where 
+    virtual void construct(void* where) = 0; 
+}; 
+ 
+template <typename Constructor> 
+class callback_leaf: public callback_base, Constructor { 
+    template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} 
+    // TODO: make the construction/destruction consistent (use allocator.construct/destroy) 
+    using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; 
+ 
+    callback_base* clone() const override { 
+        return make(*this); 
+    } 
+ 
+    void destroy() override { 
+        my_allocator_type alloc; 
+        tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); 
+        tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); 
+    } 
+ 
+    void construct(void* where) override { 
+        Constructor::construct(where); 
+    } 
+ 
+public: 
+    template<typename... P> 
+    static callback_base* make( P&& ... params ) { 
+        void* where = my_allocator_type().allocate(1); 
+        return new(where) callback_leaf( std::forward<P>(params)... ); 
+    } 
+}; 
+ 
+//! Template for recording construction of objects in table 
+/** All maintenance of the space will be done explicitly on push_back, 
+    and all thread local copies must be destroyed before the concurrent 
+    vector is deleted. 
+ 
+    The flag is_built is initialized to false.  When the local is 
+    successfully-constructed, set the flag to true or call value_committed(). 
+    If the constructor throws, the flag will be false. 
+*/ 
+template<typename U> 
+struct ets_element { 
+    detail::aligned_space<U> my_space; 
+    bool is_built; 
+    ets_element() { is_built = false; }  // not currently-built 
+    U* value() { return my_space.begin(); } 
+    U* value_committed() { is_built = true; return my_space.begin(); } 
+    ~ets_element() { 
+        if(is_built) { 
+            my_space.begin()->~U(); 
+            is_built = false; 
+        } 
+    } 
+}; 
+ 
+// A predicate that can be used for a compile-time compatibility check of ETS instances 
+// Ideally, it should have been declared inside the ETS class, but unfortunately 
+// in that case VS2013 does not enable the variadic constructor. 
+template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; 
+template<typename T, typename U, typename A, ets_key_usage_type C> 
+struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; 
+ 
+// A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression 
+template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); 
+template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; 
+ 
+//! The enumerable_thread_specific container 
+/** enumerable_thread_specific has the following properties: 
+    - thread-local copies are lazily created, with default, exemplar or function initialization. 
+    - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. 
+    - the contained objects need not have operator=() defined if combine is not used. 
+    - enumerable_thread_specific containers may be copy-constructed or assigned. 
+    - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. 
+    - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods 
+ 
+@par Segmented iterator 
+    When the thread-local objects are containers with input_iterators defined, a segmented iterator may 
+    be used to iterate over all the elements of all thread-local copies. 
+ 
+@par combine and combine_each 
+    - Both methods are defined for enumerable_thread_specific. 
+    - combine() requires the type T have operator=() defined. 
+    - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) 
+    - Both are evaluated in serial context (the methods are assumed to be non-benign.) 
+ 
+@ingroup containers */ 
+template <typename T, typename Allocator=cache_aligned_allocator<T>, 
+          ets_key_usage_type ETS_key_type=ets_no_key > 
+class enumerable_thread_specific: ets_base<ETS_key_type> { 
+ 
+    template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; 
+ 
+    using padded_element = padded<ets_element<T>>; 
+ 
+    //! A generic range, used to create range objects from the iterators 
+    template<typename I> 
+    class generic_range_type: public blocked_range<I> { 
+    public: 
+        using value_type = T; 
+        using reference = T&; 
+        using const_reference = const T&; 
+        using iterator = I; 
+        using difference_type = std::ptrdiff_t; 
+ 
+        generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} 
+        template<typename U> 
+        generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} 
+        generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} 
+    }; 
+ 
+    using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; 
+ 
+    using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; 
+    using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; 
+ 
+    callback_base *my_construct_callback; 
+ 
+    internal_collection_type my_locals; 
+ 
+    // TODO: consider unifying the callback mechanism for all create_local* methods below 
+    //   (likely non-compatible and requires interface version increase) 
+    void* create_local() override { 
+        padded_element& lref = *my_locals.grow_by(1); 
+        my_construct_callback->construct(lref.value()); 
+        return lref.value_committed(); 
+    } 
+ 
+    static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { 
+        enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 
+        padded_element& lref = *ets.my_locals.grow_by(1); 
+        new(lref.value()) T(*static_cast<T*>(p)); 
+        return lref.value_committed(); 
+    } 
+ 
+    static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { 
+        enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); 
+        padded_element& lref = *ets.my_locals.grow_by(1); 
+        new(lref.value()) T(std::move(*static_cast<T*>(p))); 
+        return lref.value_committed(); 
+    } 
+ 
+    using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; 
+ 
+    // _size is in bytes 
+    void* create_array(std::size_t _size) override { 
+        std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 
+        return array_allocator_type().allocate(nelements); 
+    } 
+ 
+    void free_array( void* _ptr, std::size_t _size) override { 
+        std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); 
+        array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); 
+    } 
+ 
+public: 
+ 
+    //! Basic types 
+    using value_type = T; 
+    using allocator_type = Allocator; 
+    using size_type = typename internal_collection_type::size_type; 
+    using difference_type = typename internal_collection_type::difference_type; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+ 
+    using pointer = typename allocator_traits_type::pointer; 
+    using const_pointer = typename allocator_traits_type::const_pointer; 
+ 
+    // Iterator types 
+    using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; 
+    using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; 
+ 
+    // Parallel range types 
+    using range_type = generic_range_type<iterator>; 
+    using const_range_type = generic_range_type<const_iterator>; 
+ 
+    //! Default constructor.  Each local instance of T is default constructed. 
+    enumerable_thread_specific() : my_construct_callback( 
+        callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) 
+    ){} 
+ 
+    //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). 
+    template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> 
+    explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( 
+        callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) 
+    ){} 
+ 
+    //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. 
+    explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( 
+        callback_leaf<construct_by_exemplar<T> >::make( exemplar ) 
+    ){} 
+ 
+    explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( 
+        callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) 
+    ){} 
+ 
+    //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...) 
+    template <typename P1, typename... P, 
+              typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value 
+                                                      && !is_compatible_ets<T, typename std::decay<P1>::type>::value 
+                                                      && !std::is_same<T, typename std::decay<P1>::type>::value 
+                                                     >::type> 
+    enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( 
+        callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) 
+    ){} 
+ 
+    //! Destructor 
+    ~enumerable_thread_specific() { 
+        if(my_construct_callback) my_construct_callback->destroy(); 
+        // Deallocate the hash table before overridden free_array() becomes inaccessible 
+        this->ets_base<ETS_key_type>::table_clear(); 
+    } 
+ 
+    //! returns reference to local, discarding exists 
+    reference local() { 
+        bool exists; 
+        return local(exists); 
+    } 
+ 
+    //! Returns reference to calling thread's local copy, creating one if necessary 
+    reference local(bool& exists)  { 
+        void* ptr = this->table_lookup(exists); 
+        return *(T*)ptr; 
+    } 
+ 
+    //! Get the number of local copies 
+    size_type size() const { return my_locals.size(); } 
+ 
+    //! true if there have been no local copies created 
+    bool empty() const { return my_locals.empty(); } 
+ 
+    //! begin iterator 
+    iterator begin() { return iterator( my_locals, 0 ); } 
+    //! end iterator 
+    iterator end() { return iterator(my_locals, my_locals.size() ); } 
+ 
+    //! begin const iterator 
+    const_iterator begin() const { return const_iterator(my_locals, 0); } 
+ 
+    //! end const iterator 
+    const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } 
+ 
+    //! Get range for parallel algorithms 
+    range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } 
+ 
+    //! Get const range for parallel algorithms 
+    const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } 
+ 
+    //! Destroys local copies 
+    void clear() { 
+        my_locals.clear(); 
+        this->table_clear(); 
+        // callback is not destroyed 
+    } 
+ 
+private: 
+    template<typename A2, ets_key_usage_type C2> 
+    void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { 
+        // this tests is_compatible_ets 
+        static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 
+        // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. 
+        my_construct_callback = other.my_construct_callback->clone(); 
+        __TBB_ASSERT(my_locals.size()==0,NULL); 
+        my_locals.reserve(other.size()); 
+        this->table_elementwise_copy( other, create_local_by_copy ); 
+    } 
+ 
+    void internal_swap(enumerable_thread_specific& other) { 
+        using std::swap; 
+        __TBB_ASSERT( this!=&other, NULL ); 
+        swap(my_construct_callback, other.my_construct_callback); 
+        // concurrent_vector::swap() preserves storage space, 
+        // so addresses to the vector kept in ETS hash table remain valid. 
+        swap(my_locals, other.my_locals); 
+        this->ets_base<ETS_key_type>::table_swap(other); 
+    } 
+ 
+    template<typename A2, ets_key_usage_type C2> 
+    void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { 
+        static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); 
+        my_construct_callback = other.my_construct_callback; 
+        other.my_construct_callback = nullptr; 
+        __TBB_ASSERT(my_locals.size()==0,NULL); 
+        my_locals.reserve(other.size()); 
+        this->table_elementwise_copy( other, create_local_by_move ); 
+    } 
+ 
+public: 
+    enumerable_thread_specific( const enumerable_thread_specific& other ) 
+    : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ 
+    { 
+        internal_copy(other); 
+    } 
+ 
+    template<typename Alloc, ets_key_usage_type Cachetype> 
+    enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 
+    { 
+        internal_copy(other); 
+    } 
+ 
+    enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() 
+    { 
+        // TODO: use internal_move correctly here 
+        internal_swap(other); 
+    } 
+ 
+    template<typename Alloc, ets_key_usage_type Cachetype> 
+    enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() 
+    { 
+        internal_move(std::move(other)); 
+    } 
+ 
+    enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) 
+    { 
+        if( this != &other ) { 
+            this->clear(); 
+            my_construct_callback->destroy(); 
+            internal_copy( other ); 
+        } 
+        return *this; 
+    } 
+ 
+    template<typename Alloc, ets_key_usage_type Cachetype> 
+    enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) 
+    { 
+        __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types 
+        this->clear(); 
+        my_construct_callback->destroy(); 
+        internal_copy(other); 
+        return *this; 
+    } 
+ 
+    enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) 
+    { 
+        if( this != &other ) { 
+            // TODO: use internal_move correctly here 
+            internal_swap(other); 
+        } 
+        return *this; 
+    } 
+ 
+    template<typename Alloc, ets_key_usage_type Cachetype> 
+    enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) 
+    { 
+        __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types 
+        this->clear(); 
+        my_construct_callback->destroy(); 
+        internal_move(std::move(other)); 
+        return *this; 
+    } 
+ 
+    // CombineFunc has signature T(T,T) or T(const T&, const T&) 
+    template <typename CombineFunc> 
+    T combine(CombineFunc f_combine) { 
+        if(begin() == end()) { 
+            ets_element<T> location; 
+            my_construct_callback->construct(location.value()); 
+            return *location.value_committed(); 
+        } 
+        const_iterator ci = begin(); 
+        T my_result = *ci; 
+        while(++ci != end()) 
+            my_result = f_combine( my_result, *ci ); 
+        return my_result; 
+    } 
+ 
+    // combine_func_t takes T by value or by [const] reference, and returns nothing 
+    template <typename CombineFunc> 
+    void combine_each(CombineFunc f_combine) { 
+        for(iterator ci = begin(); ci != end(); ++ci) { 
+            f_combine( *ci ); 
+        } 
+    } 
+ 
+}; // enumerable_thread_specific 
+ 
+template< typename Container > 
+class flattened2d { 
+    // This intermediate typedef is to address issues with VC7.1 compilers 
+    using conval_type = typename Container::value_type; 
+ 
+public: 
+    //! Basic types 
+    using size_type = typename conval_type::size_type; 
+    using difference_type = typename conval_type::difference_type; 
+    using allocator_type = typename conval_type::allocator_type; 
+    using value_type = typename conval_type::value_type; 
+    using reference = typename conval_type::reference; 
+    using const_reference = typename conval_type::const_reference; 
+    using pointer = typename conval_type::pointer; 
+    using const_pointer = typename conval_type::const_pointer; 
+ 
+    using iterator = segmented_iterator<Container, value_type>; 
+    using const_iterator = segmented_iterator<Container, const value_type>; 
+ 
+    flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : 
+        my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } 
+ 
+    explicit flattened2d( const Container &c ) : 
+        my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } 
+ 
+    iterator begin() { return iterator(*my_container) = my_begin; } 
+    iterator end() { return iterator(*my_container) = my_end; } 
+    const_iterator begin() const { return const_iterator(*my_container) = my_begin; } 
+    const_iterator end() const { return const_iterator(*my_container) = my_end; } 
+ 
+    size_type size() const { 
+        size_type tot_size = 0; 
+        for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { 
+            tot_size += i->size(); 
+        } 
+        return tot_size; 
+    } 
+ 
+private: 
+    Container *my_container; 
+    typename Container::const_iterator my_begin; 
+    typename Container::const_iterator my_end; 
+}; 
+ 
+template <typename Container> 
+flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { 
+    return flattened2d<Container>(c, b, e); 
+} 
+ 
+template <typename Container> 
+flattened2d<Container> flatten2d(const Container &c) { 
+    return flattened2d<Container>(c); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::enumerable_thread_specific; 
+using detail::d1::flattened2d; 
+using detail::d1::flatten2d; 
+// ets enum keys 
+using detail::d1::ets_key_usage_type; 
+using detail::d1::ets_key_per_instance; 
+using detail::d1::ets_no_key; 
+#if __TBB_RESUMABLE_TASKS 
+using detail::d1::ets_suspend_aware; 
+#endif 
+} // inline namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif // __TBB_enumerable_thread_specific_H 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h
index cc2cc7b605..45dc7191fc 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h
@@ -1,3221 +1,3221 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_flow_graph_H
-#define __TBB_flow_graph_H
-
-#include <atomic>
-#include <memory>
-#include <type_traits>
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "spin_mutex.h"
-#include "null_mutex.h"
-#include "spin_rw_mutex.h"
-#include "null_rw_mutex.h"
-#include "detail/_pipeline_filters.h"
-#include "detail/_task.h"
-#include "detail/_small_object_pool.h"
-#include "cache_aligned_allocator.h"
-#include "detail/_exception.h"
-#include "detail/_template_helpers.h"
-#include "detail/_aggregator.h"
-#include "detail/_allocator_traits.h"
-#include "profiling.h"
-#include "task_arena.h"
-
-#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ )
-   #if __INTEL_COMPILER
-       // Disabled warning "routine is both inline and noinline"
-       #pragma warning (push)
-       #pragma warning( disable: 2196 )
-   #endif
-   #define __TBB_NOINLINE_SYM __attribute__((noinline))
-#else
-   #define __TBB_NOINLINE_SYM
-#endif
-
-#include <tuple>
-#include <list>
-#include <queue>
-
-/** @file
-  \brief The graph related classes and functions
-
-  There are some applications that best express dependencies as messages
-  passed between nodes in a graph.  These messages may contain data or
-  simply act as signals that a predecessors has completed. The graph
-  class and its associated node classes can be used to express such
-  applications.
-*/
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-
-//! An enumeration the provides the two most common concurrency levels: unlimited and serial
-enum concurrency { unlimited = 0, serial = 1 };
-
-//! A generic null type
-struct null_type {};
-
-//! An empty class used for messages that mean "I'm done"
-class continue_msg {};
-
-//! Forward declaration section
-template< typename T > class sender;
-template< typename T > class receiver;
-class continue_receiver;
-
-template< typename T, typename U > class limiter_node;  // needed for resetting decrementer
-
-template<typename T, typename M> class successor_cache;
-template<typename T, typename M> class broadcast_cache;
-template<typename T, typename M> class round_robin_cache;
-template<typename T, typename M> class predecessor_cache;
-template<typename T, typename M> class reservable_predecessor_cache;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-namespace order {
-struct following;
-struct preceding;
-}
-template<typename Order, typename... Args> struct node_set;
-#endif
-
-
-} // namespace d1
-} // namespace detail
-} // namespace tbb
-
-//! The graph class
-#include "detail/_flow_graph_impl.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) {
-    if (second->priority > first->priority)
-        return std::make_pair(second, first);
-    return std::make_pair(first, second);
-}
-
-// submit task if necessary. Returns the non-enqueued task if there is one.
-static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) {
-    // if no RHS task, don't change left.
-    if (right == NULL) return left;
-    // right != NULL
-    if (left == NULL) return right;
-    if (left == SUCCESSFULLY_ENQUEUED) return right;
-    // left contains a task
-    if (right != SUCCESSFULLY_ENQUEUED) {
-        // both are valid tasks
-        auto tasks_pair = order_tasks(left, right);
-        spawn_in_graph_arena(g, *tasks_pair.first);
-        return tasks_pair.second;
-    }
-    return left;
-}
-
-//! Pure virtual template class that defines a sender of messages of type T
-template< typename T >
-class sender {
-public:
-    virtual ~sender() {}
-
-    //! Request an item from the sender
-    virtual bool try_get( T & ) { return false; }
-
-    //! Reserves an item in the sender
-    virtual bool try_reserve( T & ) { return false; }
-
-    //! Releases the reserved item
-    virtual bool try_release( ) { return false; }
-
-    //! Consumes the reserved item
-    virtual bool try_consume( ) { return false; }
-
-protected:
-    //! The output type of this sender
-    typedef T output_type;
-
-    //! The successor type for this node
-    typedef receiver<T> successor_type;
-
-    //! Add a new successor to this node
-    virtual bool register_successor( successor_type &r ) = 0;
-
-    //! Removes a successor from this node
-    virtual bool remove_successor( successor_type &r ) = 0;
-
-    template<typename C>
-    friend bool register_successor(sender<C>& s, receiver<C>& r);
-
-    template<typename C>
-    friend bool remove_successor  (sender<C>& s, receiver<C>& r);
-};  // class sender<T>
-
-template<typename C>
-bool register_successor(sender<C>& s, receiver<C>& r) {
-    return s.register_successor(r);
-}
-
-template<typename C>
-bool remove_successor(sender<C>& s, receiver<C>& r) {
-    return s.remove_successor(r);
-}
-
-//! Pure virtual template class that defines a receiver of messages of type T
-template< typename T >
-class receiver {
-public:
-    //! Destructor
-    virtual ~receiver() {}
-
-    //! Put an item to the receiver
-    bool try_put( const T& t ) {
-        graph_task *res = try_put_task(t);
-        if (!res) return false;
-        if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res);
-        return true;
-    }
-
-    //! put item to successor; return task to run the successor if possible.
-protected:
-    //! The input type of this receiver
-    typedef T input_type;
-
-    //! The predecessor type for this node
-    typedef sender<T> predecessor_type;
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template< typename X, typename Y > friend class broadcast_cache;
-    template< typename X, typename Y > friend class round_robin_cache;
-    virtual graph_task *try_put_task(const T& t) = 0;
-    virtual graph& graph_reference() const = 0;
-
-    template<typename TT, typename M> friend class successor_cache;
-    virtual bool is_continue_receiver() { return false; }
-
-    // TODO revamp: reconsider the inheritance and move node priority out of receiver
-    virtual node_priority_t priority() const { return no_priority; }
-
-    //! Add a predecessor to the node
-    virtual bool register_predecessor( predecessor_type & ) { return false; }
-
-    //! Remove a predecessor from the node
-    virtual bool remove_predecessor( predecessor_type & ) { return false; }
-
-    template <typename C>
-    friend bool register_predecessor(receiver<C>& r, sender<C>& s);
-    template <typename C>
-    friend bool remove_predecessor  (receiver<C>& r, sender<C>& s);
-}; // class receiver<T>
-
-template <typename C>
-bool register_predecessor(receiver<C>& r, sender<C>& s) {
-    return r.register_predecessor(s);
-}
-
-template <typename C>
-bool remove_predecessor(receiver<C>& r, sender<C>& s) {
-    return r.remove_predecessor(s);
-}
-
-//! Base class for receivers of completion messages
-/** These receivers automatically reset, but cannot be explicitly waited on */
-class continue_receiver : public receiver< continue_msg > {
-protected:
-
-    //! Constructor
-    explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) {
-        my_predecessor_count = my_initial_predecessor_count = number_of_predecessors;
-        my_current_count = 0;
-        my_priority = a_priority;
-    }
-
-    //! Copy constructor
-    continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() {
-        my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count;
-        my_current_count = 0;
-        my_priority = src.my_priority;
-    }
-
-    //! Increments the trigger threshold
-    bool register_predecessor( predecessor_type & ) override {
-        spin_mutex::scoped_lock l(my_mutex);
-        ++my_predecessor_count;
-        return true;
-    }
-
-    //! Decrements the trigger threshold
-    /** Does not check to see if the removal of the predecessor now makes the current count
-        exceed the new threshold.  So removing a predecessor while the graph is active can cause
-        unexpected results. */
-    bool remove_predecessor( predecessor_type & ) override {
-        spin_mutex::scoped_lock l(my_mutex);
-        --my_predecessor_count;
-        return true;
-    }
-
-    //! The input type
-    typedef continue_msg input_type;
-
-    //! The predecessor type for this node
-    typedef receiver<input_type>::predecessor_type predecessor_type;
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    // execute body is supposed to be too small to create a task for.
-    graph_task* try_put_task( const input_type & ) override {
-        {
-            spin_mutex::scoped_lock l(my_mutex);
-            if ( ++my_current_count < my_predecessor_count )
-                return SUCCESSFULLY_ENQUEUED;
-            else
-                my_current_count = 0;
-        }
-        graph_task* res = execute();
-        return res? res : SUCCESSFULLY_ENQUEUED;
-    }
-
-    spin_mutex my_mutex;
-    int my_predecessor_count;
-    int my_current_count;
-    int my_initial_predecessor_count;
-    node_priority_t my_priority;
-    // the friend declaration in the base class did not eliminate the "protected class"
-    // error in gcc 4.1.2
-    template<typename U, typename V> friend class limiter_node;
-
-    virtual void reset_receiver( reset_flags f ) {
-        my_current_count = 0;
-        if (f & rf_clear_edges) {
-            my_predecessor_count = my_initial_predecessor_count;
-        }
-    }
-
-    //! Does whatever should happen when the threshold is reached
-    /** This should be very fast or else spawn a task.  This is
-        called while the sender is blocked in the try_put(). */
-    virtual graph_task* execute() = 0;
-    template<typename TT, typename M> friend class successor_cache;
-    bool is_continue_receiver() override { return true; }
-
-    node_priority_t priority() const override { return my_priority; }
-}; // class continue_receiver
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    template <typename K, typename T>
-    K key_from_message( const T &t ) {
-        return t.key();
-    }
-#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-
-} // d1
-} // detail
-} // tbb
-
-#include "detail/_flow_graph_trace_impl.h"
-#include "detail/_hash_compare.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-#include "detail/_flow_graph_body_impl.h"
-#include "detail/_flow_graph_cache_impl.h"
-#include "detail/_flow_graph_types_impl.h"
-
-using namespace graph_policy_namespace;
-
-template <typename C, typename N>
-graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL)
-{
-    if (begin) current_node = my_graph->my_nodes;
-    //else it is an end iterator by default
-}
-
-template <typename C, typename N>
-typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const {
-    __TBB_ASSERT(current_node, "graph_iterator at end");
-    return *operator->();
-}
-
-template <typename C, typename N>
-typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const {
-    return current_node;
-}
-
-template <typename C, typename N>
-void graph_iterator<C,N>::internal_forward() {
-    if (current_node) current_node = current_node->next;
-}
-
-//! Constructs a graph with isolated task_group_context
-inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) {
-    prepare_task_arena();
-    own_context = true;
-    cancelled = false;
-    caught_exception = false;
-    my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS);
-    fgt_graph(this);
-    my_is_active = true;
-}
-
-inline graph::graph(task_group_context& use_this_context) :
-    my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) {
-    prepare_task_arena();
-    own_context = false;
-    cancelled = false;
-    caught_exception = false;
-    fgt_graph(this);
-    my_is_active = true;
-}
-
-inline graph::~graph() {
-    wait_for_all();
-    if (own_context) {
-        my_context->~task_group_context();
-        r1::cache_aligned_deallocate(my_context);
-    }
-    delete my_task_arena;
-}
-
-inline void graph::reserve_wait() {
-    my_wait_context.reserve();
-    fgt_reserve_wait(this);
-}
-
-inline void graph::release_wait() {
-    fgt_release_wait(this);
-    my_wait_context.release();
-}
-
-inline void graph::register_node(graph_node *n) {
-    n->next = NULL;
-    {
-        spin_mutex::scoped_lock lock(nodelist_mutex);
-        n->prev = my_nodes_last;
-        if (my_nodes_last) my_nodes_last->next = n;
-        my_nodes_last = n;
-        if (!my_nodes) my_nodes = n;
-    }
-}
-
-inline void graph::remove_node(graph_node *n) {
-    {
-        spin_mutex::scoped_lock lock(nodelist_mutex);
-        __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes");
-        if (n->prev) n->prev->next = n->next;
-        if (n->next) n->next->prev = n->prev;
-        if (my_nodes_last == n) my_nodes_last = n->prev;
-        if (my_nodes == n) my_nodes = n->next;
-    }
-    n->prev = n->next = NULL;
-}
-
-inline void graph::reset( reset_flags f ) {
-    // reset context
-    deactivate_graph(*this);
-
-    my_context->reset();
-    cancelled = false;
-    caught_exception = false;
-    // reset all the nodes comprising the graph
-    for(iterator ii = begin(); ii != end(); ++ii) {
-        graph_node *my_p = &(*ii);
-        my_p->reset_node(f);
-    }
-    // Reattach the arena. Might be useful to run the graph in a particular task_arena
-    // while not limiting graph lifetime to a single task_arena::execute() call.
-    prepare_task_arena( /*reinit=*/true );
-    activate_graph(*this);
-}
-
-inline void graph::cancel() {
-    my_context->cancel_group_execution();
-}
-
-inline graph::iterator graph::begin() { return iterator(this, true); }
-
-inline graph::iterator graph::end() { return iterator(this, false); }
-
-inline graph::const_iterator graph::begin() const { return const_iterator(this, true); }
-
-inline graph::const_iterator graph::end() const { return const_iterator(this, false); }
-
-inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); }
-
-inline graph::const_iterator graph::cend() const { return const_iterator(this, false); }
-
-inline graph_node::graph_node(graph& g) : my_graph(g) {
-    my_graph.register_node(this);
-}
-
-inline graph_node::~graph_node() {
-    my_graph.remove_node(this);
-}
-
-#include "detail/_flow_graph_node_impl.h"
-
-
-//! An executable node that acts as a source, i.e. it has no predecessors
-
-template < typename Output >
-class input_node : public graph_node, public sender< Output > {
-public:
-    //! The type of the output message, which is complete
-    typedef Output output_type;
-
-    //! The type of successors of this node
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    // Input node has no input type
-    typedef null_type input_type;
-
-    //! Constructor for a node with a successor
-    template< typename Body >
-     __TBB_NOINLINE_SYM input_node( graph &g, Body body )
-         : graph_node(g), my_active(false)
-         , my_body( new input_body_leaf< output_type, Body>(body) )
-         , my_init_body( new input_body_leaf< output_type, Body>(body) )
-         , my_successors(this), my_reserved(false), my_has_cached_item(false)
-    {
-        fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph,
-                           static_cast<sender<output_type> *>(this), this->my_body);
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Successors>
-    input_node( const node_set<order::preceding, Successors...>& successors, Body body )
-        : input_node(successors.graph_reference(), body)
-    {
-        make_edges(*this, successors);
-    }
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM input_node( const input_node& src )
-        : graph_node(src.my_graph), sender<Output>()
-        , my_active(false)
-        , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone())
-        , my_successors(this), my_reserved(false), my_has_cached_item(false)
-    {
-        fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph,
-                           static_cast<sender<output_type> *>(this), this->my_body);
-    }
-
-    //! The destructor
-    ~input_node() { delete my_body; delete my_init_body; }
-
-    //! Add a new successor to this node
-    bool register_successor( successor_type &r ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.register_successor(r);
-        if ( my_active )
-            spawn_put();
-        return true;
-    }
-
-    //! Removes a successor from this node
-    bool remove_successor( successor_type &r ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_successors.remove_successor(r);
-        return true;
-    }
-
-    //! Request an item from the node
-    bool try_get( output_type &v ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved )
-            return false;
-
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_has_cached_item = false;
-            return true;
-        }
-        // we've been asked to provide an item, but we have none.  enqueue a task to
-        // provide one.
-        if ( my_active )
-            spawn_put();
-        return false;
-    }
-
-    //! Reserves an item.
-    bool try_reserve( output_type &v ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved ) {
-            return false;
-        }
-
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_reserved = true;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    //! Release a reserved item.
-    /** true = item has been released and so remains in sender, dest must request or reserve future items */
-    bool try_release( ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" );
-        my_reserved = false;
-        if(!my_successors.empty())
-            spawn_put();
-        return true;
-    }
-
-    //! Consumes a reserved item
-    bool try_consume( ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" );
-        my_reserved = false;
-        my_has_cached_item = false;
-        if ( !my_successors.empty() ) {
-            spawn_put();
-        }
-        return true;
-    }
-
-    //! Activates a node that was created in the inactive state
-    void activate() {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_active = true;
-        if (!my_successors.empty())
-            spawn_put();
-    }
-
-    template<typename Body>
-    Body copy_function_object() {
-        input_body<output_type> &body_ref = *this->my_body;
-        return dynamic_cast< input_body_leaf<output_type, Body> & >(body_ref).get_body();
-    }
-
-protected:
-
-    //! resets the input_node to its initial state
-    void reset_node( reset_flags f) override {
-        my_active = false;
-        my_reserved = false;
-        my_has_cached_item = false;
-
-        if(f & rf_clear_edges) my_successors.clear();
-        if(f & rf_reset_bodies) {
-            input_body<output_type> *tmp = my_init_body->clone();
-            delete my_body;
-            my_body = tmp;
-        }
-    }
-
-private:
-    spin_mutex my_mutex;
-    bool my_active;
-    input_body<output_type> *my_body;
-    input_body<output_type> *my_init_body;
-    broadcast_cache< output_type > my_successors;
-    bool my_reserved;
-    bool my_has_cached_item;
-    output_type my_cached_item;
-
-    // used by apply_body_bypass, can invoke body of node.
-    bool try_reserve_apply_body(output_type &v) {
-        spin_mutex::scoped_lock lock(my_mutex);
-        if ( my_reserved ) {
-            return false;
-        }
-        if ( !my_has_cached_item ) {
-            flow_control control;
-
-            fgt_begin_body( my_body );
-
-            my_cached_item = (*my_body)(control);
-            my_has_cached_item = !control.is_pipeline_stopped;
-
-            fgt_end_body( my_body );
-        }
-        if ( my_has_cached_item ) {
-            v = my_cached_item;
-            my_reserved = true;
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    graph_task* create_put_task() {
-        small_object_allocator allocator{};
-        typedef input_node_task_bypass< input_node<output_type> > task_type;
-        graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this);
-        my_graph.reserve_wait();
-        return t;
-    }
-
-    //! Spawns a task that applies the body
-    void spawn_put( ) {
-        if(is_graph_active(this->my_graph)) {
-            spawn_in_graph_arena(this->my_graph, *create_put_task());
-        }
-    }
-
-    friend class input_node_task_bypass< input_node<output_type> >;
-    //! Applies the body.  Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it.
-    graph_task* apply_body_bypass( ) {
-        output_type v;
-        if ( !try_reserve_apply_body(v) )
-            return NULL;
-
-        graph_task *last_task = my_successors.try_put_task(v);
-        if ( last_task )
-            try_consume();
-        else
-            try_release();
-        return last_task;
-    }
-};  // class input_node
-
-//! Implements a function node that supports Input -> Output
-template<typename Input, typename Output = continue_msg, typename Policy = queueing>
-class function_node
-    : public graph_node
-    , public function_input< Input, Output, Policy, cache_aligned_allocator<Input> >
-    , public function_output<Output>
-{
-    typedef cache_aligned_allocator<Input> internals_allocator;
-
-public:
-    typedef Input input_type;
-    typedef Output output_type;
-    typedef function_input<input_type,output_type,Policy,internals_allocator> input_impl_type;
-    typedef function_input_queue<input_type, internals_allocator> input_queue_type;
-    typedef function_output<output_type> fOutput_type;
-    typedef typename input_impl_type::predecessor_type predecessor_type;
-    typedef typename fOutput_type::successor_type successor_type;
-
-    using input_impl_type::my_predecessors;
-
-    //! Constructor
-    // input_queue_type is allocated here, but destroyed in the function_input_base.
-    // TODO: pass the graph_buffer_policy to the function_input_base so it can all
-    // be done in one place.  This would be an interface-breaking change.
-    template< typename Body >
-     __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency,
-                   Body body, Policy = Policy(), node_priority_t a_priority = no_priority )
-        : graph_node(g), input_impl_type(g, concurrency, body, a_priority),
-          fOutput_type(g) {
-        fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph,
-                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    template <typename Body>
-    function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority )
-        : function_node(g, concurrency, body, Policy(), a_priority) {}
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Args>
-    function_node( const node_set<Args...>& nodes, size_t concurrency, Body body,
-                   Policy p = Policy(), node_priority_t a_priority = no_priority )
-        : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) {
-        make_edges_in_order(nodes, *this);
-    }
-
-    template <typename Body, typename... Args>
-    function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority )
-        : function_node(nodes, concurrency, body, Policy(), a_priority) {}
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM function_node( const function_node& src ) :
-        graph_node(src.my_graph),
-        input_impl_type(src),
-        fOutput_type(src.my_graph) {
-        fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph,
-                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    using input_impl_type::try_put_task;
-
-    broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; }
-
-    void reset_node(reset_flags f) override {
-        input_impl_type::reset_function_input(f);
-        // TODO: use clear() instead.
-        if(f & rf_clear_edges) {
-            successors().clear();
-            my_predecessors.clear();
-        }
-        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty");
-        __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty");
-    }
-
-};  // class function_node
-
-//! implements a function node that supports Input -> (set of outputs)
-// Output is a tuple of output types.
-template<typename Input, typename Output, typename Policy = queueing>
-class multifunction_node :
-    public graph_node,
-    public multifunction_input
-    <
-        Input,
-        typename wrap_tuple_elements<
-            std::tuple_size<Output>::value,  // #elements in tuple
-            multifunction_output,  // wrap this around each element
-            Output // the tuple providing the types
-        >::type,
-        Policy,
-        cache_aligned_allocator<Input>
-    >
-{
-    typedef cache_aligned_allocator<Input> internals_allocator;
-
-protected:
-    static const int N = std::tuple_size<Output>::value;
-public:
-    typedef Input input_type;
-    typedef null_type output_type;
-    typedef typename wrap_tuple_elements<N,multifunction_output, Output>::type output_ports_type;
-    typedef multifunction_input<
-        input_type, output_ports_type, Policy, internals_allocator> input_impl_type;
-    typedef function_input_queue<input_type, internals_allocator> input_queue_type;
-private:
-    using input_impl_type::my_predecessors;
-public:
-    template<typename Body>
-    __TBB_NOINLINE_SYM multifunction_node(
-        graph &g, size_t concurrency,
-        Body body, Policy = Policy(), node_priority_t a_priority = no_priority
-    ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) {
-        fgt_multioutput_node_with_body<N>(
-            CODEPTR(), FLOW_MULTIFUNCTION_NODE,
-            &this->my_graph, static_cast<receiver<input_type> *>(this),
-            this->output_ports(), this->my_body
-        );
-    }
-
-    template <typename Body>
-    __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority)
-        : multifunction_node(g, concurrency, body, Policy(), a_priority) {}
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Args>
-    __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body,
-                       Policy p = Policy(), node_priority_t a_priority = no_priority)
-        : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) {
-        make_edges_in_order(nodes, *this);
-    }
-
-    template <typename Body, typename... Args>
-    __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority)
-        : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {}
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-    __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) :
-        graph_node(other.my_graph), input_impl_type(other) {
-        fgt_multioutput_node_with_body<N>( CODEPTR(), FLOW_MULTIFUNCTION_NODE,
-                &this->my_graph, static_cast<receiver<input_type> *>(this),
-                this->output_ports(), this->my_body );
-    }
-
-    // all the guts are in multifunction_input...
-protected:
-    void reset_node(reset_flags f) override { input_impl_type::reset(f); }
-};  // multifunction_node
-
-//! split_node: accepts a tuple as input, forwards each element of the tuple to its
-//  successors.  The node has unlimited concurrency, so it does not reject inputs.
-template<typename TupleType>
-class split_node : public graph_node, public receiver<TupleType> {
-    static const int N = std::tuple_size<TupleType>::value;
-    typedef receiver<TupleType> base_type;
-public:
-    typedef TupleType input_type;
-    typedef typename wrap_tuple_elements<
-            N,  // #elements in tuple
-            multifunction_output,  // wrap this around each element
-            TupleType // the tuple providing the types
-        >::type  output_ports_type;
-
-    __TBB_NOINLINE_SYM explicit split_node(graph &g)
-        : graph_node(g),
-          my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports))
-    {
-        fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph,
-            static_cast<receiver<input_type> *>(this), this->output_ports());
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    __TBB_NOINLINE_SYM split_node(const node_set<Args...>& nodes) : split_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    __TBB_NOINLINE_SYM split_node(const split_node& other)
-        : graph_node(other.my_graph), base_type(other),
-          my_output_ports(init_output_ports<output_ports_type>::call(other.my_graph, my_output_ports))
-    {
-        fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph,
-            static_cast<receiver<input_type> *>(this), this->output_ports());
-    }
-
-    output_ports_type &output_ports() { return my_output_ports; }
-
-protected:
-    graph_task *try_put_task(const TupleType& t) override {
-        // Sending split messages in parallel is not justified, as overheads would prevail.
-        // Also, we do not have successors here. So we just tell the task returned here is successful.
-        return emit_element<N>::emit_this(this->my_graph, t, output_ports());
-    }
-    void reset_node(reset_flags f) override {
-        if (f & rf_clear_edges)
-            clear_element<N>::clear_this(my_output_ports);
-
-        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "split_node reset failed");
-    }
-    graph& graph_reference() const override {
-        return my_graph;
-    }
-
-private:
-    output_ports_type my_output_ports;
-};
-
-//! Implements an executable node that supports continue_msg -> Output
-template <typename Output, typename Policy = Policy<void> >
-class continue_node : public graph_node, public continue_input<Output, Policy>,
-                      public function_output<Output> {
-public:
-    typedef continue_msg input_type;
-    typedef Output output_type;
-    typedef continue_input<Output, Policy> input_impl_type;
-    typedef function_output<output_type> fOutput_type;
-    typedef typename input_impl_type::predecessor_type predecessor_type;
-    typedef typename fOutput_type::successor_type successor_type;
-
-    //! Constructor for executable node with continue_msg -> Output
-    template <typename Body >
-    __TBB_NOINLINE_SYM continue_node(
-        graph &g,
-        Body body, Policy = Policy(), node_priority_t a_priority = no_priority
-    ) : graph_node(g), input_impl_type( g, body, a_priority ),
-        fOutput_type(g) {
-        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph,
-
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    template <typename Body>
-    continue_node( graph& g, Body body, node_priority_t a_priority )
-        : continue_node(g, body, Policy(), a_priority) {}
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Args>
-    continue_node( const node_set<Args...>& nodes, Body body,
-                   Policy p = Policy(), node_priority_t a_priority = no_priority )
-        : continue_node(nodes.graph_reference(), body, p, a_priority ) {
-        make_edges_in_order(nodes, *this);
-    }
-    template <typename Body, typename... Args>
-    continue_node( const node_set<Args...>& nodes, Body body, node_priority_t a_priority)
-        : continue_node(nodes, body, Policy(), a_priority) {}
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-    //! Constructor for executable node with continue_msg -> Output
-    template <typename Body >
-    __TBB_NOINLINE_SYM continue_node(
-        graph &g, int number_of_predecessors,
-        Body body, Policy = Policy(), node_priority_t a_priority = no_priority
-    ) : graph_node(g)
-      , input_impl_type(g, number_of_predecessors, body, a_priority),
-        fOutput_type(g) {
-        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph,
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-    template <typename Body>
-    continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority)
-        : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {}
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Args>
-    continue_node( const node_set<Args...>& nodes, int number_of_predecessors,
-                   Body body, Policy p = Policy(), node_priority_t a_priority = no_priority )
-        : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) {
-        make_edges_in_order(nodes, *this);
-    }
-
-    template <typename Body, typename... Args>
-    continue_node( const node_set<Args...>& nodes, int number_of_predecessors,
-                   Body body, node_priority_t a_priority )
-        : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {}
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM continue_node( const continue_node& src ) :
-        graph_node(src.my_graph), input_impl_type(src),
-        function_output<Output>(src.my_graph) {
-        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph,
-                                           static_cast<receiver<input_type> *>(this),
-                                           static_cast<sender<output_type> *>(this), this->my_body );
-    }
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    using input_impl_type::try_put_task;
-    broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; }
-
-    void reset_node(reset_flags f) override {
-        input_impl_type::reset_receiver(f);
-        if(f & rf_clear_edges)successors().clear();
-        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset");
-    }
-};  // continue_node
-
-//! Forwards messages of type T to all successors
-template <typename T>
-class broadcast_node : public graph_node, public receiver<T>, public sender<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-private:
-    broadcast_cache<input_type> my_successors;
-public:
-
-    __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) {
-        fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph,
-                  static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    broadcast_node(const node_set<Args...>& nodes) : broadcast_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {}
-
-    //! Adds a successor
-    bool register_successor( successor_type &r ) override {
-        my_successors.register_successor( r );
-        return true;
-    }
-
-    //! Removes s as a successor
-    bool remove_successor( successor_type &r ) override {
-        my_successors.remove_successor( r );
-        return true;
-    }
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    //! build a task to run the successor if possible.  Default is old behavior.
-    graph_task *try_put_task(const T& t) override {
-        graph_task *new_task = my_successors.try_put_task(t);
-        if (!new_task) new_task = SUCCESSFULLY_ENQUEUED;
-        return new_task;
-    }
-
-    graph& graph_reference() const override {
-        return my_graph;
-    }
-
-    void reset_node(reset_flags f) override {
-        if (f&rf_clear_edges) {
-           my_successors.clear();
-        }
-        __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node");
-    }
-};  // broadcast_node
-
-//! Forwards messages in arbitrary order
-template <typename T>
-class buffer_node
-    : public graph_node
-    , public reservable_item_buffer< T, cache_aligned_allocator<T> >
-    , public receiver<T>, public sender<T>
-{
-    typedef cache_aligned_allocator<T> internals_allocator;
-
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-    typedef buffer_node<T> class_type;
-
-protected:
-    typedef size_t size_type;
-    round_robin_cache< T, null_rw_mutex > my_successors;
-
-    friend class forward_task_bypass< class_type >;
-
-    enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task
-    };
-
-    // implements the aggregator_operation concept
-    class buffer_operation : public aggregated_operation< buffer_operation > {
-    public:
-        char type;
-        T* elem;
-        graph_task* ltask;
-        successor_type *r;
-
-        buffer_operation(const T& e, op_type t) : type(char(t))
-                                                  , elem(const_cast<T*>(&e)) , ltask(NULL)
-        {}
-        buffer_operation(op_type t) : type(char(t)),  ltask(NULL) {}
-    };
-
-    bool forwarder_busy;
-    typedef aggregating_functor<class_type, buffer_operation> handler_type;
-    friend class aggregating_functor<class_type, buffer_operation>;
-    aggregator< handler_type, buffer_operation> my_aggregator;
-
-    virtual void handle_operations(buffer_operation *op_list) {
-        handle_operations_impl(op_list, this);
-    }
-
-    template<typename derived_type>
-    void handle_operations_impl(buffer_operation *op_list, derived_type* derived) {
-        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived");
-
-        buffer_operation *tmp = NULL;
-        bool try_forwarding = false;
-        while (op_list) {
-            tmp = op_list;
-            op_list = op_list->next;
-            switch (tmp->type) {
-            case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break;
-            case rem_succ: internal_rem_succ(tmp); break;
-            case req_item: internal_pop(tmp); break;
-            case res_item: internal_reserve(tmp); break;
-            case rel_res:  internal_release(tmp); try_forwarding = true; break;
-            case con_res:  internal_consume(tmp); try_forwarding = true; break;
-            case put_item: try_forwarding = internal_push(tmp); break;
-            case try_fwd_task: internal_forward_task(tmp); break;
-            }
-        }
-
-        derived->order();
-
-        if (try_forwarding && !forwarder_busy) {
-            if(is_graph_active(this->my_graph)) {
-                forwarder_busy = true;
-                typedef forward_task_bypass<class_type> task_type;
-                small_object_allocator allocator{};
-                graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this);
-                my_graph.reserve_wait();
-                // tmp should point to the last item handled by the aggregator.  This is the operation
-                // the handling thread enqueued.  So modifying that record will be okay.
-                // TODO revamp: check that the issue is still present
-                // workaround for icc bug  (at least 12.0 and 13.0)
-                // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list
-                //        argument types are: (graph, graph_task *, graph_task *)
-                graph_task *z = tmp->ltask;
-                graph &g = this->my_graph;
-                tmp->ltask = combine_tasks(g, z, new_task);  // in case the op generated a task
-            }
-        }
-    }  // handle_operations
-
-    inline graph_task *grab_forwarding_task( buffer_operation &op_data) {
-        return op_data.ltask;
-    }
-
-    inline bool enqueue_forwarding_task(buffer_operation &op_data) {
-        graph_task *ft = grab_forwarding_task(op_data);
-        if(ft) {
-            spawn_in_graph_arena(graph_reference(), *ft);
-            return true;
-        }
-        return false;
-    }
-
-    //! This is executed by an enqueued task, the "forwarder"
-    virtual graph_task *forward_task() {
-        buffer_operation op_data(try_fwd_task);
-        graph_task *last_task = NULL;
-        do {
-            op_data.status = WAIT;
-            op_data.ltask = NULL;
-            my_aggregator.execute(&op_data);
-
-            // workaround for icc bug
-            graph_task *xtask = op_data.ltask;
-            graph& g = this->my_graph;
-            last_task = combine_tasks(g, last_task, xtask);
-        } while (op_data.status ==SUCCEEDED);
-        return last_task;
-    }
-
-    //! Register successor
-    virtual void internal_reg_succ(buffer_operation *op) {
-        my_successors.register_successor(*(op->r));
-        op->status.store(SUCCEEDED, std::memory_order_release);
-    }
-
-    //! Remove successor
-    virtual void internal_rem_succ(buffer_operation *op) {
-        my_successors.remove_successor(*(op->r));
-        op->status.store(SUCCEEDED, std::memory_order_release);
-    }
-
-private:
-    void order() {}
-
-    bool is_item_valid() {
-        return this->my_item_valid(this->my_tail - 1);
-    }
-
-    void try_put_and_add_task(graph_task*& last_task) {
-        graph_task *new_task = my_successors.try_put_task(this->back());
-        if (new_task) {
-            // workaround for icc bug
-            graph& g = this->my_graph;
-            last_task = combine_tasks(g, last_task, new_task);
-            this->destroy_back();
-        }
-    }
-
-protected:
-    //! Tries to forward valid items to successors
-    virtual void internal_forward_task(buffer_operation *op) {
-        internal_forward_task_impl(op, this);
-    }
-
-    template<typename derived_type>
-    void internal_forward_task_impl(buffer_operation *op, derived_type* derived) {
-        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived");
-
-        if (this->my_reserved || !derived->is_item_valid()) {
-            op->status.store(FAILED, std::memory_order_release);
-            this->forwarder_busy = false;
-            return;
-        }
-        // Try forwarding, giving each successor a chance
-        graph_task* last_task = NULL;
-        size_type counter = my_successors.size();
-        for (; counter > 0 && derived->is_item_valid(); --counter)
-            derived->try_put_and_add_task(last_task);
-
-        op->ltask = last_task;  // return task
-        if (last_task && !counter) {
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        }
-        else {
-            op->status.store(FAILED, std::memory_order_release);
-            forwarder_busy = false;
-        }
-    }
-
-    virtual bool internal_push(buffer_operation *op) {
-        this->push_back(*(op->elem));
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        return true;
-    }
-
-    virtual void internal_pop(buffer_operation *op) {
-        if(this->pop_back(*(op->elem))) {
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        }
-        else {
-            op->status.store(FAILED, std::memory_order_release);
-        }
-    }
-
-    virtual void internal_reserve(buffer_operation *op) {
-        if(this->reserve_front(*(op->elem))) {
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        }
-        else {
-            op->status.store(FAILED, std::memory_order_release);
-        }
-    }
-
-    virtual void internal_consume(buffer_operation *op) {
-        this->consume_front();
-        op->status.store(SUCCEEDED, std::memory_order_release);
-    }
-
-    virtual void internal_release(buffer_operation *op) {
-        this->release_front();
-        op->status.store(SUCCEEDED, std::memory_order_release);
-    }
-
-public:
-    //! Constructor
-    __TBB_NOINLINE_SYM explicit buffer_node( graph &g )
-        : graph_node(g), reservable_item_buffer<T, internals_allocator>(), receiver<T>(),
-          sender<T>(), my_successors(this), forwarder_busy(false)
-    {
-        my_aggregator.initialize_handler(handler_type(this));
-        fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph,
-                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    buffer_node(const node_set<Args...>& nodes) : buffer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {}
-
-    //
-    // message sender implementation
-    //
-
-    //! Adds a new successor.
-    /** Adds successor r to the list of successors; may forward tasks.  */
-    bool register_successor( successor_type &r ) override {
-        buffer_operation op_data(reg_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-    //! Removes a successor.
-    /** Removes successor r from the list of successors.
-        It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */
-    bool remove_successor( successor_type &r ) override {
-        // TODO revamp: investigate why full qualification is necessary here
-        tbb::detail::d1::remove_predecessor(r, *this);
-        buffer_operation op_data(rem_succ);
-        op_data.r = &r;
-        my_aggregator.execute(&op_data);
-        // even though this operation does not cause a forward, if we are the handler, and
-        // a forward is scheduled, we may be the first to reach this point after the aggregator,
-        // and so should check for the task.
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-    //! Request an item from the buffer_node
-    /**  true = v contains the returned item<BR>
-         false = no item has been returned */
-    bool try_get( T &v ) override {
-        buffer_operation op_data(req_item);
-        op_data.elem = &v;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return (op_data.status==SUCCEEDED);
-    }
-
-    //! Reserves an item.
-    /**  false = no item can be reserved<BR>
-         true = an item is reserved */
-    bool try_reserve( T &v ) override {
-        buffer_operation op_data(res_item);
-        op_data.elem = &v;
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return (op_data.status==SUCCEEDED);
-    }
-
-    //! Release a reserved item.
-    /**  true = item has been released and so remains in sender */
-    bool try_release() override {
-        buffer_operation op_data(rel_res);
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-    //! Consumes a reserved item.
-    /** true = item is removed from sender and reservation removed */
-    bool try_consume() override {
-        buffer_operation op_data(con_res);
-        my_aggregator.execute(&op_data);
-        (void)enqueue_forwarding_task(op_data);
-        return true;
-    }
-
-protected:
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    //! receive an item, return a task *if possible
-    graph_task *try_put_task(const T &t) override {
-        buffer_operation op_data(t, put_item);
-        my_aggregator.execute(&op_data);
-        graph_task *ft = grab_forwarding_task(op_data);
-        // sequencer_nodes can return failure (if an item has been previously inserted)
-        // We have to spawn the returned task if our own operation fails.
-
-        if(ft && op_data.status ==FAILED) {
-            // we haven't succeeded queueing the item, but for some reason the
-            // call returned a task (if another request resulted in a successful
-            // forward this could happen.)  Queue the task and reset the pointer.
-            spawn_in_graph_arena(graph_reference(), *ft); ft = NULL;
-        }
-        else if(!ft && op_data.status ==SUCCEEDED) {
-            ft = SUCCESSFULLY_ENQUEUED;
-        }
-        return ft;
-    }
-
-    graph& graph_reference() const override {
-        return my_graph;
-    }
-
-protected:
-    void reset_node( reset_flags f) override {
-        reservable_item_buffer<T, internals_allocator>::reset();
-        // TODO: just clear structures
-        if (f&rf_clear_edges) {
-            my_successors.clear();
-        }
-        forwarder_busy = false;
-    }
-};  // buffer_node
-
-//! Forwards messages in FIFO order
-template <typename T>
-class queue_node : public buffer_node<T> {
-protected:
-    typedef buffer_node<T> base_type;
-    typedef typename base_type::size_type size_type;
-    typedef typename base_type::buffer_operation queue_operation;
-    typedef queue_node class_type;
-
-private:
-    template<typename> friend class buffer_node;
-
-    bool is_item_valid() {
-        return this->my_item_valid(this->my_head);
-    }
-
-    void try_put_and_add_task(graph_task*& last_task) {
-        graph_task *new_task = this->my_successors.try_put_task(this->front());
-        if (new_task) {
-            // workaround for icc bug
-            graph& graph_ref = this->graph_reference();
-            last_task = combine_tasks(graph_ref, last_task, new_task);
-            this->destroy_front();
-        }
-    }
-
-protected:
-    void internal_forward_task(queue_operation *op) override {
-        this->internal_forward_task_impl(op, this);
-    }
-
-    void internal_pop(queue_operation *op) override {
-        if ( this->my_reserved || !this->my_item_valid(this->my_head)){
-            op->status.store(FAILED, std::memory_order_release);
-        }
-        else {
-            this->pop_front(*(op->elem));
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        }
-    }
-    void internal_reserve(queue_operation *op) override {
-        if (this->my_reserved || !this->my_item_valid(this->my_head)) {
-            op->status.store(FAILED, std::memory_order_release);
-        }
-        else {
-            this->reserve_front(*(op->elem));
-            op->status.store(SUCCEEDED, std::memory_order_release);
-        }
-    }
-    void internal_consume(queue_operation *op) override {
-        this->consume_front();
-        op->status.store(SUCCEEDED, std::memory_order_release);
-    }
-
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) {
-        fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    queue_node( const node_set<Args...>& nodes) : queue_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) {
-        fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-
-protected:
-    void reset_node( reset_flags f) override {
-        base_type::reset_node(f);
-    }
-};  // queue_node
-
-//! Forwards messages in sequence order
-template <typename T>
-class sequencer_node : public queue_node<T> {
-    function_body< T, size_t > *my_sequencer;
-    // my_sequencer should be a benign function and must be callable
-    // from a parallel context.  Does this mean it needn't be reset?
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    template< typename Sequencer >
-    __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node<T>(g),
-        my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) {
-        fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Sequencer, typename... Args>
-    sequencer_node( const node_set<Args...>& nodes, const Sequencer& s)
-        : sequencer_node(nodes.graph_reference(), s) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node<T>(src),
-        my_sequencer( src.my_sequencer->clone() ) {
-        fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-    //! Destructor
-    ~sequencer_node() { delete my_sequencer; }
-
-protected:
-    typedef typename buffer_node<T>::size_type size_type;
-    typedef typename buffer_node<T>::buffer_operation sequencer_operation;
-
-private:
-    bool internal_push(sequencer_operation *op) override {
-        size_type tag = (*my_sequencer)(*(op->elem));
-#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
-        if (tag < this->my_head) {
-            // have already emitted a message with this tag
-            op->status.store(FAILED, std::memory_order_release);
-            return false;
-        }
-#endif
-        // cannot modify this->my_tail now; the buffer would be inconsistent.
-        size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail;
-
-        if (this->size(new_tail) > this->capacity()) {
-            this->grow_my_array(this->size(new_tail));
-        }
-        this->my_tail = new_tail;
-
-        const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED;
-        op->status.store(res, std::memory_order_release);
-        return res ==SUCCEEDED;
-    }
-};  // sequencer_node
-
-//! Forwards messages in priority order
-template<typename T, typename Compare = std::less<T>>
-class priority_queue_node : public buffer_node<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef buffer_node<T> base_type;
-    typedef priority_queue_node class_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() )
-        : buffer_node<T>(g), compare(comp), mark(0) {
-        fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    priority_queue_node(const node_set<Args...>& nodes, const Compare& comp = Compare())
-        : priority_queue_node(nodes.graph_reference(), comp) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor
-    __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src )
-        : buffer_node<T>(src), mark(0)
-    {
-        fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-protected:
-
-    void reset_node( reset_flags f) override {
-        mark = 0;
-        base_type::reset_node(f);
-    }
-
-    typedef typename buffer_node<T>::size_type size_type;
-    typedef typename buffer_node<T>::item_type item_type;
-    typedef typename buffer_node<T>::buffer_operation prio_operation;
-
-    //! Tries to forward valid items to successors
-    void internal_forward_task(prio_operation *op) override {
-        this->internal_forward_task_impl(op, this);
-    }
-
-    void handle_operations(prio_operation *op_list) override {
-        this->handle_operations_impl(op_list, this);
-    }
-
-    bool internal_push(prio_operation *op) override {
-        prio_push(*(op->elem));
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        return true;
-    }
-
-    void internal_pop(prio_operation *op) override {
-        // if empty or already reserved, don't pop
-        if ( this->my_reserved == true || this->my_tail == 0 ) {
-            op->status.store(FAILED, std::memory_order_release);
-            return;
-        }
-
-        *(op->elem) = prio();
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        prio_pop();
-
-    }
-
-    // pops the highest-priority item, saves copy
-    void internal_reserve(prio_operation *op) override {
-        if (this->my_reserved == true || this->my_tail == 0) {
-            op->status.store(FAILED, std::memory_order_release);
-            return;
-        }
-        this->my_reserved = true;
-        *(op->elem) = prio();
-        reserved_item = *(op->elem);
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        prio_pop();
-    }
-
-    void internal_consume(prio_operation *op) override {
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        this->my_reserved = false;
-        reserved_item = input_type();
-    }
-
-    void internal_release(prio_operation *op) override {
-        op->status.store(SUCCEEDED, std::memory_order_release);
-        prio_push(reserved_item);
-        this->my_reserved = false;
-        reserved_item = input_type();
-    }
-
-private:
-    template<typename> friend class buffer_node;
-
-    void order() {
-        if (mark < this->my_tail) heapify();
-        __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify");
-    }
-
-    bool is_item_valid() {
-        return this->my_tail > 0;
-    }
-
-    void try_put_and_add_task(graph_task*& last_task) {
-        graph_task * new_task = this->my_successors.try_put_task(this->prio());
-        if (new_task) {
-            // workaround for icc bug
-            graph& graph_ref = this->graph_reference();
-            last_task = combine_tasks(graph_ref, last_task, new_task);
-            prio_pop();
-        }
-    }
-
-private:
-    Compare compare;
-    size_type mark;
-
-    input_type reserved_item;
-
-    // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item
-    bool prio_use_tail() {
-        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test");
-        return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1));
-    }
-
-    // prio_push: checks that the item will fit, expand array if necessary, put at end
-    void prio_push(const T &src) {
-        if ( this->my_tail >= this->my_array_size )
-            this->grow_my_array( this->my_tail + 1 );
-        (void) this->place_item(this->my_tail, src);
-        ++(this->my_tail);
-        __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push");
-    }
-
-    // prio_pop: deletes highest priority item from the array, and if it is item
-    // 0, move last item to 0 and reheap.  If end of array, just destroy and decrement tail
-    // and mark.  Assumes the array has already been tested for emptiness; no failure.
-    void prio_pop()  {
-        if (prio_use_tail()) {
-            // there are newly pushed elements; last one higher than top
-            // copy the data
-            this->destroy_item(this->my_tail-1);
-            --(this->my_tail);
-            __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop");
-            return;
-        }
-        this->destroy_item(0);
-        if(this->my_tail > 1) {
-            // push the last element down heap
-            __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL);
-            this->move_item(0,this->my_tail - 1);
-        }
-        --(this->my_tail);
-        if(mark > this->my_tail) --mark;
-        if (this->my_tail > 1) // don't reheap for heap of size 1
-            reheap();
-        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop");
-    }
-
-    const T& prio() {
-        return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0);
-    }
-
-    // turn array into heap
-    void heapify() {
-        if(this->my_tail == 0) {
-            mark = 0;
-            return;
-        }
-        if (!mark) mark = 1;
-        for (; mark<this->my_tail; ++mark) { // for each unheaped element
-            size_type cur_pos = mark;
-            input_type to_place;
-            this->fetch_item(mark,to_place);
-            do { // push to_place up the heap
-                size_type parent = (cur_pos-1)>>1;
-                if (!compare(this->get_my_item(parent), to_place))
-                    break;
-                this->move_item(cur_pos, parent);
-                cur_pos = parent;
-            } while( cur_pos );
-            (void) this->place_item(cur_pos, to_place);
-        }
-    }
-
-    // otherwise heapified array with new root element; rearrange to heap
-    void reheap() {
-        size_type cur_pos=0, child=1;
-        while (child < mark) {
-            size_type target = child;
-            if (child+1<mark &&
-                compare(this->get_my_item(child),
-                        this->get_my_item(child+1)))
-                ++target;
-            // target now has the higher priority child
-            if (compare(this->get_my_item(target),
-                        this->get_my_item(cur_pos)))
-                break;
-            // swap
-            this->swap_items(cur_pos, target);
-            cur_pos = target;
-            child = (cur_pos<<1)+1;
-        }
-    }
-};  // priority_queue_node
-
-//! Forwards messages only if the threshold has not been reached
-/** This node forwards items until its threshold is reached.
-    It contains no buffering.  If the downstream node rejects, the
-    message is dropped. */
-template< typename T, typename DecrementType=continue_msg >
-class limiter_node : public graph_node, public receiver< T >, public sender< T > {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-    //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later.
-
-private:
-    size_t my_threshold;
-    size_t my_count; // number of successful puts
-    size_t my_tries; // number of active put attempts
-    reservable_predecessor_cache< T, spin_mutex > my_predecessors;
-    spin_mutex my_mutex;
-    broadcast_cache< T > my_successors;
-
-    //! The internal receiver< DecrementType > that adjusts the count
-    threshold_regulator< limiter_node<T, DecrementType>, DecrementType > decrement;
-
-    graph_task* decrement_counter( long long delta ) {
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            if( delta > 0 && size_t(delta) > my_count )
-                my_count = 0;
-            else if( delta < 0 && size_t(delta) > my_threshold - my_count )
-                my_count = my_threshold;
-            else
-                my_count -= size_t(delta); // absolute value of delta is sufficiently small
-        }
-        return forward_task();
-    }
-
-    // Let threshold_regulator call decrement_counter()
-    friend class threshold_regulator< limiter_node<T, DecrementType>, DecrementType >;
-
-    friend class forward_task_bypass< limiter_node<T,DecrementType> >;
-
-    bool check_conditions() {  // always called under lock
-        return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() );
-    }
-
-    // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED
-    graph_task* forward_task() {
-        input_type v;
-        graph_task* rval = NULL;
-        bool reserved = false;
-            {
-                spin_mutex::scoped_lock lock(my_mutex);
-                if ( check_conditions() )
-                    ++my_tries;
-                else
-                    return NULL;
-            }
-
-        //SUCCESS
-        // if we can reserve and can put, we consume the reservation
-        // we increment the count and decrement the tries
-        if ( (my_predecessors.try_reserve(v)) == true ){
-            reserved=true;
-            if ( (rval = my_successors.try_put_task(v)) != NULL ){
-                {
-                    spin_mutex::scoped_lock lock(my_mutex);
-                    ++my_count;
-                    --my_tries;
-                    my_predecessors.try_consume();
-                    if ( check_conditions() ) {
-                        if ( is_graph_active(this->my_graph) ) {
-                            typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type;
-                            small_object_allocator allocator{};
-                            graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this );
-                            my_graph.reserve_wait();
-                            spawn_in_graph_arena(graph_reference(), *rtask);
-                        }
-                    }
-                }
-                return rval;
-            }
-        }
-        //FAILURE
-        //if we can't reserve, we decrement the tries
-        //if we can reserve but can't put, we decrement the tries and release the reservation
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            --my_tries;
-            if (reserved) my_predecessors.try_release();
-            if ( check_conditions() ) {
-                if ( is_graph_active(this->my_graph) ) {
-                    small_object_allocator allocator{};
-                    typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type;
-                    graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this);
-                    my_graph.reserve_wait();
-                    __TBB_ASSERT(!rval, "Have two tasks to handle");
-                    return t;
-                }
-            }
-            return rval;
-        }
-    }
-
-    void initialize() {
-        fgt_node(
-            CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph,
-            static_cast<receiver<input_type> *>(this), static_cast<receiver<DecrementType> *>(&decrement),
-            static_cast<sender<output_type> *>(this)
-        );
-    }
-
-public:
-    //! Constructor
-    limiter_node(graph &g, size_t threshold)
-        : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this)
-        , my_successors(this), decrement(this)
-    {
-        initialize();
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    limiter_node(const node_set<Args...>& nodes, size_t threshold)
-        : limiter_node(nodes.graph_reference(), threshold) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor
-    limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {}
-
-    //! The interface for accessing internal receiver< DecrementType > that adjusts the count
-    receiver<DecrementType>& decrementer() { return decrement; }
-
-    //! Replace the current successor with this new successor
-    bool register_successor( successor_type &r ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        bool was_empty = my_successors.empty();
-        my_successors.register_successor(r);
-        //spawn a forward task if this is the only successor
-        if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) {
-            if ( is_graph_active(this->my_graph) ) {
-                small_object_allocator allocator{};
-                typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type;
-                graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this);
-                my_graph.reserve_wait();
-                spawn_in_graph_arena(graph_reference(), *t);
-            }
-        }
-        return true;
-    }
-
-    //! Removes a successor from this node
-    /** r.remove_predecessor(*this) is also called. */
-    bool remove_successor( successor_type &r ) override {
-        // TODO revamp: investigate why qualification is needed for remove_predecessor() call
-        tbb::detail::d1::remove_predecessor(r, *this);
-        my_successors.remove_successor(r);
-        return true;
-    }
-
-    //! Adds src to the list of cached predecessors.
-    bool register_predecessor( predecessor_type &src ) override {
-        spin_mutex::scoped_lock lock(my_mutex);
-        my_predecessors.add( src );
-        if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) {
-            small_object_allocator allocator{};
-            typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type;
-            graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this);
-            my_graph.reserve_wait();
-            spawn_in_graph_arena(graph_reference(), *t);
-        }
-        return true;
-    }
-
-    //! Removes src from the list of cached predecessors.
-    bool remove_predecessor( predecessor_type &src ) override {
-        my_predecessors.remove( src );
-        return true;
-    }
-
-protected:
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    //! Puts an item to this receiver
-    graph_task* try_put_task( const T &t ) override {
-        {
-            spin_mutex::scoped_lock lock(my_mutex);
-            if ( my_count + my_tries >= my_threshold )
-                return NULL;
-            else
-                ++my_tries;
-        }
-
-        graph_task* rtask = my_successors.try_put_task(t);
-
-        if ( !rtask ) {  // try_put_task failed.
-            spin_mutex::scoped_lock lock(my_mutex);
-            --my_tries;
-            if (check_conditions() && is_graph_active(this->my_graph)) {
-                small_object_allocator allocator{};
-                typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type;
-                rtask = allocator.new_object<task_type>(my_graph, allocator, *this);
-                my_graph.reserve_wait();
-            }
-        }
-        else {
-            spin_mutex::scoped_lock lock(my_mutex);
-            ++my_count;
-            --my_tries;
-             }
-        return rtask;
-    }
-
-    graph& graph_reference() const override { return my_graph; }
-
-    void reset_node( reset_flags f) override {
-        my_count = 0;
-        if(f & rf_clear_edges) {
-            my_predecessors.clear();
-            my_successors.clear();
-        }
-        else
-        {
-            my_predecessors.reset( );
-        }
-        decrement.reset_receiver(f);
-    }
-};  // limiter_node
-
-#include "detail/_flow_graph_join_impl.h"
-
-template<typename OutputTuple, typename JP=queueing> class join_node;
-
-template<typename OutputTuple>
-class join_node<OutputTuple,reserving>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> {
-private:
-    static const int N = std::tuple_size<OutputTuple>::value;
-    typedef unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-     __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-template<typename OutputTuple>
-class join_node<OutputTuple,queueing>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> {
-private:
-    static const int N = std::tuple_size<OutputTuple>::value;
-    typedef unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-     __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph,
-                                            this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-// template for key_matching join_node
-// tag_matching join_node is a specialization of key_matching, and is source-compatible.
-template<typename OutputTuple, typename K, typename KHash>
-class join_node<OutputTuple, key_matching<K, KHash> > : public unfolded_join_node<std::tuple_size<OutputTuple>::value,
-      key_matching_port, OutputTuple, key_matching<K,KHash> > {
-private:
-    static const int N = std::tuple_size<OutputTuple>::value;
-    typedef unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type;
-public:
-    typedef OutputTuple output_type;
-    typedef typename unfolded_type::input_ports_type input_ports_type;
-
-#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING
-    join_node(graph &g) : unfolded_type(g) {}
-#endif  /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */
-
-    template<typename __TBB_B0, typename __TBB_B1>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) :
-            unfolded_type(g, b0, b1, b2, b3, b4) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#if __TBB_VARIADIC_MAX >= 6
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) :
-            unfolded_type(g, b0, b1, b2, b3, b4, b5) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 7
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) :
-            unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 8
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 9
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-#if __TBB_VARIADIC_MAX >= 10
-    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4,
-        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9>
-     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6,
-            __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-#endif
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <
-#if (__clang_major__ == 3 && __clang_minor__ == 4)
-        // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter.
-        template<typename...> class node_set,
-#endif
-        typename... Args, typename... Bodies
-    >
-    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, Bodies... bodies)
-        : join_node(nodes.graph_reference(), bodies...) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph,
-                                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-// indexer node
-#include "detail/_flow_graph_indexer_impl.h"
-
-// TODO: Implement interface with variadic template or tuple
-template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type,
-                      typename T4=null_type, typename T5=null_type, typename T6=null_type,
-                      typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node;
-
-//indexer node specializations
-template<typename T0>
-class indexer_node<T0> : public unfolded_indexer_node<std::tuple<T0> > {
-private:
-    static const int N = 1;
-public:
-    typedef std::tuple<T0> InputTuple;
-    typedef tagged_msg<size_t, T0> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-};
-
-template<typename T0, typename T1>
-class indexer_node<T0, T1> : public unfolded_indexer_node<std::tuple<T0, T1> > {
-private:
-    static const int N = 2;
-public:
-    typedef std::tuple<T0, T1> InputTuple;
-    typedef tagged_msg<size_t, T0, T1> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-template<typename T0, typename T1, typename T2>
-class indexer_node<T0, T1, T2> : public unfolded_indexer_node<std::tuple<T0, T1, T2> > {
-private:
-    static const int N = 3;
-public:
-    typedef std::tuple<T0, T1, T2> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-template<typename T0, typename T1, typename T2, typename T3>
-class indexer_node<T0, T1, T2, T3> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3> > {
-private:
-    static const int N = 4;
-public:
-    typedef std::tuple<T0, T1, T2, T3> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4>
-class indexer_node<T0, T1, T2, T3, T4> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4> > {
-private:
-    static const int N = 5;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-
-#if __TBB_VARIADIC_MAX >= 6
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-class indexer_node<T0, T1, T2, T3, T4, T5> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5> > {
-private:
-    static const int N = 6;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4, T5> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-#endif //variadic max 6
-
-#if __TBB_VARIADIC_MAX >= 7
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6> > {
-private:
-    static const int N = 7;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-#endif //variadic max 7
-
-#if __TBB_VARIADIC_MAX >= 8
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> > {
-private:
-    static const int N = 8;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-#endif //variadic max 8
-
-#if __TBB_VARIADIC_MAX >= 9
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7, typename T8>
-class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > {
-private:
-    static const int N = 9;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-#endif //variadic max 9
-
-#if __TBB_VARIADIC_MAX >= 10
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
-         typename T6, typename T7, typename T8, typename T9>
-class indexer_node/*default*/ : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > {
-private:
-    static const int N = 10;
-public:
-    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple;
-    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type;
-    typedef unfolded_indexer_node<InputTuple> unfolded_type;
-    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    // Copy constructor
-    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) {
-        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph,
-                                           this->input_ports(), static_cast< sender< output_type > *>(this) );
-    }
-
-};
-#endif //variadic max 10
-
-template< typename T >
-inline void internal_make_edge( sender<T> &p, receiver<T> &s ) {
-    register_successor(p, s);
-    fgt_make_edge( &p, &s );
-}
-
-//! Makes an edge between a single predecessor and a single successor
-template< typename T >
-inline void make_edge( sender<T> &p, receiver<T> &s ) {
-    internal_make_edge( p, s );
-}
-
-//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor.
-template< typename T, typename V,
-          typename = typename T::output_ports_type, typename = typename V::input_ports_type >
-inline void make_edge( T& output, V& input) {
-    make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports()));
-}
-
-//Makes an edge from port 0 of a multi-output predecessor to a receiver.
-template< typename T, typename R,
-          typename = typename T::output_ports_type >
-inline void make_edge( T& output, receiver<R>& input) {
-     make_edge(std::get<0>(output.output_ports()), input);
-}
-
-//Makes an edge from a sender to port 0 of a multi-input successor.
-template< typename S,  typename V,
-          typename = typename V::input_ports_type >
-inline void make_edge( sender<S>& output, V& input) {
-     make_edge(output, std::get<0>(input.input_ports()));
-}
-
-template< typename T >
-inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) {
-    remove_successor( p, s );
-    fgt_remove_edge( &p, &s );
-}
-
-//! Removes an edge between a single predecessor and a single successor
-template< typename T >
-inline void remove_edge( sender<T> &p, receiver<T> &s ) {
-    internal_remove_edge( p, s );
-}
-
-//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor.
-template< typename T, typename V,
-          typename = typename T::output_ports_type, typename = typename V::input_ports_type >
-inline void remove_edge( T& output, V& input) {
-    remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports()));
-}
-
-//Removes an edge between port 0 of a multi-output predecessor and a receiver.
-template< typename T, typename R,
-          typename = typename T::output_ports_type >
-inline void remove_edge( T& output, receiver<R>& input) {
-     remove_edge(std::get<0>(output.output_ports()), input);
-}
-//Removes an edge between a sender and port 0 of a multi-input successor.
-template< typename S,  typename V,
-          typename = typename V::input_ports_type >
-inline void remove_edge( sender<S>& output, V& input) {
-     remove_edge(output, std::get<0>(input.input_ports()));
-}
-
-//! Returns a copy of the body from a function or continue node
-template< typename Body, typename Node >
-Body copy_body( Node &n ) {
-    return n.template copy_function_object<Body>();
-}
-
-//composite_node
-template< typename InputTuple, typename OutputTuple > class composite_node;
-
-template< typename... InputTypes, typename... OutputTypes>
-class composite_node <std::tuple<InputTypes...>, std::tuple<OutputTypes...> > : public graph_node {
-
-public:
-    typedef std::tuple< receiver<InputTypes>&... > input_ports_type;
-    typedef std::tuple< sender<OutputTypes>&... > output_ports_type;
-
-private:
-    std::unique_ptr<input_ports_type> my_input_ports;
-    std::unique_ptr<output_ports_type> my_output_ports;
-
-    static const size_t NUM_INPUTS = sizeof...(InputTypes);
-    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes);
-
-protected:
-    void reset_node(reset_flags) override {}
-
-public:
-    composite_node( graph &g ) : graph_node(g) {
-        fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph );
-    }
-
-    template<typename T1, typename T2>
-    void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) {
-        static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports");
-        static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports");
-
-        fgt_internal_input_alias_helper<T1, NUM_INPUTS>::alias_port( this, input_ports_tuple);
-        fgt_internal_output_alias_helper<T2, NUM_OUTPUTS>::alias_port( this, output_ports_tuple);
-
-        my_input_ports.reset( new input_ports_type(std::forward<T1>(input_ports_tuple)) );
-        my_output_ports.reset( new output_ports_type(std::forward<T2>(output_ports_tuple)) );
-    }
-
-    template< typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); }
-
-    template< typename... NodeTypes >
-    void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); }
-
-
-    input_ports_type& input_ports() {
-         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports");
-         return *my_input_ports;
-    }
-
-    output_ports_type& output_ports() {
-         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports");
-         return *my_output_ports;
-    }
-};  // class composite_node
-
-//composite_node with only input ports
-template< typename... InputTypes>
-class composite_node <std::tuple<InputTypes...>, std::tuple<> > : public graph_node {
-public:
-    typedef std::tuple< receiver<InputTypes>&... > input_ports_type;
-
-private:
-    std::unique_ptr<input_ports_type> my_input_ports;
-    static const size_t NUM_INPUTS = sizeof...(InputTypes);
-
-protected:
-    void reset_node(reset_flags) override {}
-
-public:
-    composite_node( graph &g ) : graph_node(g) {
-        fgt_composite( CODEPTR(), this, &g );
-    }
-
-   template<typename T>
-   void set_external_ports(T&& input_ports_tuple) {
-       static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports");
-
-       fgt_internal_input_alias_helper<T, NUM_INPUTS>::alias_port( this, input_ports_tuple);
-
-       my_input_ports.reset( new input_ports_type(std::forward<T>(input_ports_tuple)) );
-   }
-
-    template< typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); }
-
-    template< typename... NodeTypes >
-    void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); }
-
-
-    input_ports_type& input_ports() {
-         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports");
-         return *my_input_ports;
-    }
-
-};  // class composite_node
-
-//composite_nodes with only output_ports
-template<typename... OutputTypes>
-class composite_node <std::tuple<>, std::tuple<OutputTypes...> > : public graph_node {
-public:
-    typedef std::tuple< sender<OutputTypes>&... > output_ports_type;
-
-private:
-    std::unique_ptr<output_ports_type> my_output_ports;
-    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes);
-
-protected:
-    void reset_node(reset_flags) override {}
-
-public:
-    __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) {
-        fgt_composite( CODEPTR(), this, &g );
-    }
-
-   template<typename T>
-   void set_external_ports(T&& output_ports_tuple) {
-       static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports");
-
-       fgt_internal_output_alias_helper<T, NUM_OUTPUTS>::alias_port( this, output_ports_tuple);
-
-       my_output_ports.reset( new output_ports_type(std::forward<T>(output_ports_tuple)) );
-   }
-
-    template<typename... NodeTypes >
-    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); }
-
-    template<typename... NodeTypes >
-    void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); }
-
-
-    output_ports_type& output_ports() {
-         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports");
-         return *my_output_ports;
-    }
-
-};  // class composite_node
-
-template<typename Gateway>
-class async_body_base: no_assign {
-public:
-    typedef Gateway gateway_type;
-
-    async_body_base(gateway_type *gateway): my_gateway(gateway) { }
-    void set_gateway(gateway_type *gateway) {
-        my_gateway = gateway;
-    }
-
-protected:
-    gateway_type *my_gateway;
-};
-
-template<typename Input, typename Ports, typename Gateway, typename Body>
-class async_body: public async_body_base<Gateway> {
-public:
-    typedef async_body_base<Gateway> base_type;
-    typedef Gateway gateway_type;
-
-    async_body(const Body &body, gateway_type *gateway)
-        : base_type(gateway), my_body(body) { }
-
-    void operator()( const Input &v, Ports & ) {
-        my_body(v, *this->my_gateway);
-    }
-
-    Body get_body() { return my_body; }
-
-private:
-    Body my_body;
-};
-
-//! Implements async node
-template < typename Input, typename Output,
-           typename Policy = queueing_lightweight >
-class async_node
-    : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output >
-{
-    typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type;
-    typedef multifunction_input<
-        Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator<Input>> mfn_input_type;
-
-public:
-    typedef Input input_type;
-    typedef Output output_type;
-    typedef receiver<input_type> receiver_type;
-    typedef receiver<output_type> successor_type;
-    typedef sender<input_type> predecessor_type;
-    typedef receiver_gateway<output_type> gateway_type;
-    typedef async_body_base<gateway_type> async_body_base_type;
-    typedef typename base_type::output_ports_type output_ports_type;
-
-private:
-    class receiver_gateway_impl: public receiver_gateway<Output> {
-    public:
-        receiver_gateway_impl(async_node* node): my_node(node) {}
-        void reserve_wait() override {
-            fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph);
-            my_node->my_graph.reserve_wait();
-        }
-
-        void release_wait() override {
-            async_node* n = my_node;
-            graph* g = &n->my_graph;
-            g->release_wait();
-            fgt_async_commit(static_cast<typename async_node::receiver_type *>(n), g);
-        }
-
-        //! Implements gateway_type::try_put for an external activity to submit a message to FG
-        bool try_put(const Output &i) override {
-            return my_node->try_put_impl(i);
-        }
-
-    private:
-        async_node* my_node;
-    } my_gateway;
-
-    //The substitute of 'this' for member construction, to prevent compiler warnings
-    async_node* self() { return this; }
-
-    //! Implements gateway_type::try_put for an external activity to submit a message to FG
-    bool try_put_impl(const Output &i) {
-        multifunction_output<Output> &port_0 = output_port<0>(*this);
-        broadcast_cache<output_type>& port_successors = port_0.successors();
-        fgt_async_try_put_begin(this, &port_0);
-        // TODO revamp: change to std::list<graph_task*>
-        graph_task_list tasks;
-        bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks);
-        __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(),
-                      "Return status is inconsistent with the method operation." );
-
-        while( !tasks.empty() ) {
-            enqueue_in_graph_arena(this->my_graph, tasks.pop_front());
-        }
-        fgt_async_try_put_end(this, &port_0);
-        return is_at_least_one_put_successful;
-    }
-
-public:
-    template<typename Body>
-    __TBB_NOINLINE_SYM async_node(
-        graph &g, size_t concurrency,
-        Body body, Policy = Policy(), node_priority_t a_priority = no_priority
-    ) : base_type(
-        g, concurrency,
-        async_body<Input, typename base_type::output_ports_type, gateway_type, Body>
-        (body, &my_gateway), a_priority ), my_gateway(self()) {
-        fgt_multioutput_node_with_body<1>(
-            CODEPTR(), FLOW_ASYNC_NODE,
-            &this->my_graph, static_cast<receiver<input_type> *>(this),
-            this->output_ports(), this->my_body
-        );
-    }
-
-    template <typename Body, typename... Args>
-    __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority)
-        : async_node(g, concurrency, body, Policy(), a_priority) {}
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename Body, typename... Args>
-    __TBB_NOINLINE_SYM async_node(
-        const node_set<Args...>& nodes, size_t concurrency, Body body,
-        Policy = Policy(), node_priority_t a_priority = no_priority )
-        : async_node(nodes.graph_reference(), concurrency, body, a_priority) {
-        make_edges_in_order(nodes, *this);
-    }
-
-    template <typename Body, typename... Args>
-    __TBB_NOINLINE_SYM async_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority)
-        : async_node(nodes, concurrency, body, Policy(), a_priority) {}
-#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-
-    __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) {
-        static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway);
-        static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway);
-
-        fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE,
-                &this->my_graph, static_cast<receiver<input_type> *>(this),
-                this->output_ports(), this->my_body );
-    }
-
-    gateway_type& gateway() {
-        return my_gateway;
-    }
-
-    // Define sender< Output >
-
-    //! Add a new successor to this node
-    bool register_successor(successor_type&) override {
-        __TBB_ASSERT(false, "Successors must be registered only via ports");
-        return false;
-    }
-
-    //! Removes a successor from this node
-    bool remove_successor(successor_type&) override {
-        __TBB_ASSERT(false, "Successors must be removed only via ports");
-        return false;
-    }
-
-    template<typename Body>
-    Body copy_function_object() {
-        typedef multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type;
-        typedef async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type;
-        mfn_body_type &body_ref = *this->my_body;
-        async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr());
-        return ab.get_body();
-    }
-
-protected:
-
-    void reset_node( reset_flags f) override {
-       base_type::reset_node(f);
-    }
-};
-
-#include "detail/_flow_graph_node_set_impl.h"
-
-template< typename T >
-class overwrite_node : public graph_node, public receiver<T>, public sender<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    __TBB_NOINLINE_SYM explicit overwrite_node(graph &g)
-        : graph_node(g), my_successors(this), my_buffer_is_valid(false)
-    {
-        fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph,
-                  static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    overwrite_node(const node_set<Args...>& nodes) : overwrite_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor; doesn't take anything from src; default won't work
-    __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {}
-
-    ~overwrite_node() {}
-
-    bool register_successor( successor_type &s ) override {
-        spin_mutex::scoped_lock l( my_mutex );
-        if (my_buffer_is_valid && is_graph_active( my_graph )) {
-            // We have a valid value that must be forwarded immediately.
-            bool ret = s.try_put( my_buffer );
-            if ( ret ) {
-                // We add the successor that accepted our put
-                my_successors.register_successor( s );
-            } else {
-                // In case of reservation a race between the moment of reservation and register_successor can appear,
-                // because failed reserve does not mean that register_successor is not ready to put a message immediately.
-                // We have some sort of infinite loop: reserving node tries to set pull state for the edge,
-                // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation.
-                small_object_allocator allocator{};
-                typedef register_predecessor_task task_type;
-                graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s);
-                graph_reference().reserve_wait();
-                spawn_in_graph_arena( my_graph, *t );
-            }
-        } else {
-            // No valid value yet, just add as successor
-            my_successors.register_successor( s );
-        }
-        return true;
-    }
-
-    bool remove_successor( successor_type &s ) override {
-        spin_mutex::scoped_lock l( my_mutex );
-        my_successors.remove_successor(s);
-        return true;
-    }
-
-    bool try_get( input_type &v ) override {
-        spin_mutex::scoped_lock l( my_mutex );
-        if ( my_buffer_is_valid ) {
-            v = my_buffer;
-            return true;
-        }
-        return false;
-    }
-
-    //! Reserves an item
-    bool try_reserve( T &v ) override {
-        return try_get(v);
-    }
-
-    //! Releases the reserved item
-    bool try_release() override { return true; }
-
-    //! Consumes the reserved item
-    bool try_consume() override { return true; }
-
-    bool is_valid() {
-       spin_mutex::scoped_lock l( my_mutex );
-       return my_buffer_is_valid;
-    }
-
-    void clear() {
-       spin_mutex::scoped_lock l( my_mutex );
-       my_buffer_is_valid = false;
-    }
-
-protected:
-
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    graph_task* try_put_task( const input_type &v ) override {
-        spin_mutex::scoped_lock l( my_mutex );
-        return try_put_task_impl(v);
-    }
-
-    graph_task * try_put_task_impl(const input_type &v) {
-        my_buffer = v;
-        my_buffer_is_valid = true;
-        graph_task* rtask = my_successors.try_put_task(v);
-        if (!rtask) rtask = SUCCESSFULLY_ENQUEUED;
-        return rtask;
-    }
-
-    graph& graph_reference() const override {
-        return my_graph;
-    }
-
-    //! Breaks an infinite loop between the node reservation and register_successor call
-    struct register_predecessor_task : public graph_task {
-        register_predecessor_task(
-            graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ)
-            : graph_task(g, allocator), o(owner), s(succ) {};
-
-        task* execute(execution_data& ed) override {
-            // TODO revamp: investigate why qualification is needed for register_successor() call
-            using tbb::detail::d1::register_predecessor;
-            using tbb::detail::d1::register_successor;
-            if ( !register_predecessor(s, o) ) {
-                register_successor(o, s);
-            }
-            finalize(ed);
-            return nullptr;
-        }
-
-        predecessor_type& o;
-        successor_type& s;
-    };
-
-    spin_mutex my_mutex;
-    broadcast_cache< input_type, null_rw_mutex > my_successors;
-    input_type my_buffer;
-    bool my_buffer_is_valid;
-
-    void reset_node( reset_flags f) override {
-        my_buffer_is_valid = false;
-       if (f&rf_clear_edges) {
-           my_successors.clear();
-       }
-    }
-};  // overwrite_node
-
-template< typename T >
-class write_once_node : public overwrite_node<T> {
-public:
-    typedef T input_type;
-    typedef T output_type;
-    typedef overwrite_node<T> base_type;
-    typedef typename receiver<input_type>::predecessor_type predecessor_type;
-    typedef typename sender<output_type>::successor_type successor_type;
-
-    //! Constructor
-    __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) {
-        fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    template <typename... Args>
-    write_once_node(const node_set<Args...>& nodes) : write_once_node(nodes.graph_reference()) {
-        make_edges_in_order(nodes, *this);
-    }
-#endif
-
-    //! Copy constructor: call base class copy constructor
-    __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) {
-        fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph),
-                                 static_cast<receiver<input_type> *>(this),
-                                 static_cast<sender<output_type> *>(this) );
-    }
-
-protected:
-    template< typename R, typename B > friend class run_and_put_task;
-    template<typename X, typename Y> friend class broadcast_cache;
-    template<typename X, typename Y> friend class round_robin_cache;
-    graph_task *try_put_task( const T &v ) override {
-        spin_mutex::scoped_lock l( this->my_mutex );
-        return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v);
-    }
-}; // write_once_node
-
-inline void set_name(const graph& g, const char *name) {
-    fgt_graph_desc(&g, name);
-}
-
-template <typename Output>
-inline void set_name(const input_node<Output>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename Input, typename Output, typename Policy>
-inline void set_name(const function_node<Input, Output, Policy>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename Output, typename Policy>
-inline void set_name(const continue_node<Output,Policy>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const broadcast_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const buffer_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const queue_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const sequencer_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T, typename Compare>
-inline void set_name(const priority_queue_node<T, Compare>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T, typename DecrementType>
-inline void set_name(const limiter_node<T, DecrementType>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename OutputTuple, typename JP>
-inline void set_name(const join_node<OutputTuple, JP>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename... Types>
-inline void set_name(const indexer_node<Types...>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const overwrite_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template <typename T>
-inline void set_name(const write_once_node<T>& node, const char *name) {
-    fgt_node_desc(&node, name);
-}
-
-template<typename Input, typename Output, typename Policy>
-inline void set_name(const multifunction_node<Input, Output, Policy>& node, const char *name) {
-    fgt_multioutput_node_desc(&node, name);
-}
-
-template<typename TupleType>
-inline void set_name(const split_node<TupleType>& node, const char *name) {
-    fgt_multioutput_node_desc(&node, name);
-}
-
-template< typename InputTuple, typename OutputTuple >
-inline void set_name(const composite_node<InputTuple, OutputTuple>& node, const char *name) {
-    fgt_multiinput_multioutput_node_desc(&node, name);
-}
-
-template<typename Input, typename Output, typename Policy>
-inline void set_name(const async_node<Input, Output, Policy>& node, const char *name)
-{
-    fgt_multioutput_node_desc(&node, name);
-}
-} // d1
-} // detail
-} // tbb
-
-
-// Include deduction guides for node classes
-#include "detail/_flow_graph_nodes_deduction.h"
-
-namespace tbb {
-namespace flow {
-inline namespace v1 {
-    using detail::d1::receiver;
-    using detail::d1::sender;
-
-    using detail::d1::serial;
-    using detail::d1::unlimited;
-
-    using detail::d1::reset_flags;
-    using detail::d1::rf_reset_protocol;
-    using detail::d1::rf_reset_bodies;
-    using detail::d1::rf_clear_edges;
-
-    using detail::d1::graph;
-    using detail::d1::graph_node;
-    using detail::d1::continue_msg;
-
-    using detail::d1::input_node;
-    using detail::d1::function_node;
-    using detail::d1::multifunction_node;
-    using detail::d1::split_node;
-    using detail::d1::output_port;
-    using detail::d1::indexer_node;
-    using detail::d1::tagged_msg;
-    using detail::d1::cast_to;
-    using detail::d1::is_a;
-    using detail::d1::continue_node;
-    using detail::d1::overwrite_node;
-    using detail::d1::write_once_node;
-    using detail::d1::broadcast_node;
-    using detail::d1::buffer_node;
-    using detail::d1::queue_node;
-    using detail::d1::sequencer_node;
-    using detail::d1::priority_queue_node;
-    using detail::d1::limiter_node;
-    using namespace detail::d1::graph_policy_namespace;
-    using detail::d1::join_node;
-    using detail::d1::input_port;
-    using detail::d1::copy_body;
-    using detail::d1::make_edge;
-    using detail::d1::remove_edge;
-    using detail::d1::tag_value;
-    using detail::d1::composite_node;
-    using detail::d1::async_node;
-    using detail::d1::node_priority_t;
-    using detail::d1::no_priority;
-
-#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
-    using detail::d1::follows;
-    using detail::d1::precedes;
-    using detail::d1::make_node_set;
-    using detail::d1::make_edges;
-#endif
-
-} // v1
-} // flow
-
-    using detail::d1::flow_control;
-
-namespace profiling {
-    using detail::d1::set_name;
-} // profiling
-
-} // tbb
-
-
-#if TBB_USE_PROFILING_TOOLS  && ( __linux__ || __APPLE__ )
-   // We don't do pragma pop here, since it still gives warning on the USER side
-   #undef __TBB_NOINLINE_SYM
-#endif
-
-#endif // __TBB_flow_graph_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_flow_graph_H 
+#define __TBB_flow_graph_H 
+ 
+#include <atomic> 
+#include <memory> 
+#include <type_traits> 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "spin_mutex.h" 
+#include "null_mutex.h" 
+#include "spin_rw_mutex.h" 
+#include "null_rw_mutex.h" 
+#include "detail/_pipeline_filters.h" 
+#include "detail/_task.h" 
+#include "detail/_small_object_pool.h" 
+#include "cache_aligned_allocator.h" 
+#include "detail/_exception.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_aggregator.h" 
+#include "detail/_allocator_traits.h" 
+#include "profiling.h" 
+#include "task_arena.h" 
+ 
+#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) 
+   #if __INTEL_COMPILER 
+       // Disabled warning "routine is both inline and noinline" 
+       #pragma warning (push) 
+       #pragma warning( disable: 2196 ) 
+   #endif 
+   #define __TBB_NOINLINE_SYM __attribute__((noinline)) 
+#else 
+   #define __TBB_NOINLINE_SYM 
+#endif 
+ 
+#include <tuple> 
+#include <list> 
+#include <queue> 
+ 
+/** @file 
+  \brief The graph related classes and functions 
+ 
+  There are some applications that best express dependencies as messages 
+  passed between nodes in a graph.  These messages may contain data or 
+  simply act as signals that a predecessors has completed. The graph 
+  class and its associated node classes can be used to express such 
+  applications. 
+*/ 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+ 
+//! An enumeration the provides the two most common concurrency levels: unlimited and serial 
+enum concurrency { unlimited = 0, serial = 1 }; 
+ 
+//! A generic null type 
+struct null_type {}; 
+ 
+//! An empty class used for messages that mean "I'm done" 
+class continue_msg {}; 
+ 
+//! Forward declaration section 
+template< typename T > class sender; 
+template< typename T > class receiver; 
+class continue_receiver; 
+ 
+template< typename T, typename U > class limiter_node;  // needed for resetting decrementer 
+ 
+template<typename T, typename M> class successor_cache; 
+template<typename T, typename M> class broadcast_cache; 
+template<typename T, typename M> class round_robin_cache; 
+template<typename T, typename M> class predecessor_cache; 
+template<typename T, typename M> class reservable_predecessor_cache; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+namespace order { 
+struct following; 
+struct preceding; 
+} 
+template<typename Order, typename... Args> struct node_set; 
+#endif 
+ 
+ 
+} // namespace d1 
+} // namespace detail 
+} // namespace tbb 
+ 
+//! The graph class 
+#include "detail/_flow_graph_impl.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) { 
+    if (second->priority > first->priority) 
+        return std::make_pair(second, first); 
+    return std::make_pair(first, second); 
+} 
+ 
+// submit task if necessary. Returns the non-enqueued task if there is one. 
+static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) { 
+    // if no RHS task, don't change left. 
+    if (right == NULL) return left; 
+    // right != NULL 
+    if (left == NULL) return right; 
+    if (left == SUCCESSFULLY_ENQUEUED) return right; 
+    // left contains a task 
+    if (right != SUCCESSFULLY_ENQUEUED) { 
+        // both are valid tasks 
+        auto tasks_pair = order_tasks(left, right); 
+        spawn_in_graph_arena(g, *tasks_pair.first); 
+        return tasks_pair.second; 
+    } 
+    return left; 
+} 
+ 
+//! Pure virtual template class that defines a sender of messages of type T 
+template< typename T > 
+class sender { 
+public: 
+    virtual ~sender() {} 
+ 
+    //! Request an item from the sender 
+    virtual bool try_get( T & ) { return false; } 
+ 
+    //! Reserves an item in the sender 
+    virtual bool try_reserve( T & ) { return false; } 
+ 
+    //! Releases the reserved item 
+    virtual bool try_release( ) { return false; } 
+ 
+    //! Consumes the reserved item 
+    virtual bool try_consume( ) { return false; } 
+ 
+protected: 
+    //! The output type of this sender 
+    typedef T output_type; 
+ 
+    //! The successor type for this node 
+    typedef receiver<T> successor_type; 
+ 
+    //! Add a new successor to this node 
+    virtual bool register_successor( successor_type &r ) = 0; 
+ 
+    //! Removes a successor from this node 
+    virtual bool remove_successor( successor_type &r ) = 0; 
+ 
+    template<typename C> 
+    friend bool register_successor(sender<C>& s, receiver<C>& r); 
+ 
+    template<typename C> 
+    friend bool remove_successor  (sender<C>& s, receiver<C>& r); 
+};  // class sender<T> 
+ 
+template<typename C> 
+bool register_successor(sender<C>& s, receiver<C>& r) { 
+    return s.register_successor(r); 
+} 
+ 
+template<typename C> 
+bool remove_successor(sender<C>& s, receiver<C>& r) { 
+    return s.remove_successor(r); 
+} 
+ 
+//! Pure virtual template class that defines a receiver of messages of type T 
+template< typename T > 
+class receiver { 
+public: 
+    //! Destructor 
+    virtual ~receiver() {} 
+ 
+    //! Put an item to the receiver 
+    bool try_put( const T& t ) { 
+        graph_task *res = try_put_task(t); 
+        if (!res) return false; 
+        if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); 
+        return true; 
+    } 
+ 
+    //! put item to successor; return task to run the successor if possible. 
+protected: 
+    //! The input type of this receiver 
+    typedef T input_type; 
+ 
+    //! The predecessor type for this node 
+    typedef sender<T> predecessor_type; 
+ 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template< typename X, typename Y > friend class broadcast_cache; 
+    template< typename X, typename Y > friend class round_robin_cache; 
+    virtual graph_task *try_put_task(const T& t) = 0; 
+    virtual graph& graph_reference() const = 0; 
+ 
+    template<typename TT, typename M> friend class successor_cache; 
+    virtual bool is_continue_receiver() { return false; } 
+ 
+    // TODO revamp: reconsider the inheritance and move node priority out of receiver 
+    virtual node_priority_t priority() const { return no_priority; } 
+ 
+    //! Add a predecessor to the node 
+    virtual bool register_predecessor( predecessor_type & ) { return false; } 
+ 
+    //! Remove a predecessor from the node 
+    virtual bool remove_predecessor( predecessor_type & ) { return false; } 
+ 
+    template <typename C> 
+    friend bool register_predecessor(receiver<C>& r, sender<C>& s); 
+    template <typename C> 
+    friend bool remove_predecessor  (receiver<C>& r, sender<C>& s); 
+}; // class receiver<T> 
+ 
+template <typename C> 
+bool register_predecessor(receiver<C>& r, sender<C>& s) { 
+    return r.register_predecessor(s); 
+} 
+ 
+template <typename C> 
+bool remove_predecessor(receiver<C>& r, sender<C>& s) { 
+    return r.remove_predecessor(s); 
+} 
+ 
+//! Base class for receivers of completion messages 
+/** These receivers automatically reset, but cannot be explicitly waited on */ 
+class continue_receiver : public receiver< continue_msg > { 
+protected: 
+ 
+    //! Constructor 
+    explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) { 
+        my_predecessor_count = my_initial_predecessor_count = number_of_predecessors; 
+        my_current_count = 0; 
+        my_priority = a_priority; 
+    } 
+ 
+    //! Copy constructor 
+    continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() { 
+        my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count; 
+        my_current_count = 0; 
+        my_priority = src.my_priority; 
+    } 
+ 
+    //! Increments the trigger threshold 
+    bool register_predecessor( predecessor_type & ) override { 
+        spin_mutex::scoped_lock l(my_mutex); 
+        ++my_predecessor_count; 
+        return true; 
+    } 
+ 
+    //! Decrements the trigger threshold 
+    /** Does not check to see if the removal of the predecessor now makes the current count 
+        exceed the new threshold.  So removing a predecessor while the graph is active can cause 
+        unexpected results. */ 
+    bool remove_predecessor( predecessor_type & ) override { 
+        spin_mutex::scoped_lock l(my_mutex); 
+        --my_predecessor_count; 
+        return true; 
+    } 
+ 
+    //! The input type 
+    typedef continue_msg input_type; 
+ 
+    //! The predecessor type for this node 
+    typedef receiver<input_type>::predecessor_type predecessor_type; 
+ 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    // execute body is supposed to be too small to create a task for. 
+    graph_task* try_put_task( const input_type & ) override { 
+        { 
+            spin_mutex::scoped_lock l(my_mutex); 
+            if ( ++my_current_count < my_predecessor_count ) 
+                return SUCCESSFULLY_ENQUEUED; 
+            else 
+                my_current_count = 0; 
+        } 
+        graph_task* res = execute(); 
+        return res? res : SUCCESSFULLY_ENQUEUED; 
+    } 
+ 
+    spin_mutex my_mutex; 
+    int my_predecessor_count; 
+    int my_current_count; 
+    int my_initial_predecessor_count; 
+    node_priority_t my_priority; 
+    // the friend declaration in the base class did not eliminate the "protected class" 
+    // error in gcc 4.1.2 
+    template<typename U, typename V> friend class limiter_node; 
+ 
+    virtual void reset_receiver( reset_flags f ) { 
+        my_current_count = 0; 
+        if (f & rf_clear_edges) { 
+            my_predecessor_count = my_initial_predecessor_count; 
+        } 
+    } 
+ 
+    //! Does whatever should happen when the threshold is reached 
+    /** This should be very fast or else spawn a task.  This is 
+        called while the sender is blocked in the try_put(). */ 
+    virtual graph_task* execute() = 0; 
+    template<typename TT, typename M> friend class successor_cache; 
+    bool is_continue_receiver() override { return true; } 
+ 
+    node_priority_t priority() const override { return my_priority; } 
+}; // class continue_receiver 
+ 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+    template <typename K, typename T> 
+    K key_from_message( const T &t ) { 
+        return t.key(); 
+    } 
+#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+ 
+} // d1 
+} // detail 
+} // tbb 
+ 
+#include "detail/_flow_graph_trace_impl.h" 
+#include "detail/_hash_compare.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+#include "detail/_flow_graph_body_impl.h" 
+#include "detail/_flow_graph_cache_impl.h" 
+#include "detail/_flow_graph_types_impl.h" 
+ 
+using namespace graph_policy_namespace; 
+ 
+template <typename C, typename N> 
+graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL) 
+{ 
+    if (begin) current_node = my_graph->my_nodes; 
+    //else it is an end iterator by default 
+} 
+ 
+template <typename C, typename N> 
+typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const { 
+    __TBB_ASSERT(current_node, "graph_iterator at end"); 
+    return *operator->(); 
+} 
+ 
+template <typename C, typename N> 
+typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const { 
+    return current_node; 
+} 
+ 
+template <typename C, typename N> 
+void graph_iterator<C,N>::internal_forward() { 
+    if (current_node) current_node = current_node->next; 
+} 
+ 
+//! Constructs a graph with isolated task_group_context 
+inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { 
+    prepare_task_arena(); 
+    own_context = true; 
+    cancelled = false; 
+    caught_exception = false; 
+    my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS); 
+    fgt_graph(this); 
+    my_is_active = true; 
+} 
+ 
+inline graph::graph(task_group_context& use_this_context) : 
+    my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { 
+    prepare_task_arena(); 
+    own_context = false; 
+    cancelled = false; 
+    caught_exception = false; 
+    fgt_graph(this); 
+    my_is_active = true; 
+} 
+ 
+inline graph::~graph() { 
+    wait_for_all(); 
+    if (own_context) { 
+        my_context->~task_group_context(); 
+        r1::cache_aligned_deallocate(my_context); 
+    } 
+    delete my_task_arena; 
+} 
+ 
+inline void graph::reserve_wait() { 
+    my_wait_context.reserve(); 
+    fgt_reserve_wait(this); 
+} 
+ 
+inline void graph::release_wait() { 
+    fgt_release_wait(this); 
+    my_wait_context.release(); 
+} 
+ 
+inline void graph::register_node(graph_node *n) { 
+    n->next = NULL; 
+    { 
+        spin_mutex::scoped_lock lock(nodelist_mutex); 
+        n->prev = my_nodes_last; 
+        if (my_nodes_last) my_nodes_last->next = n; 
+        my_nodes_last = n; 
+        if (!my_nodes) my_nodes = n; 
+    } 
+} 
+ 
+inline void graph::remove_node(graph_node *n) { 
+    { 
+        spin_mutex::scoped_lock lock(nodelist_mutex); 
+        __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes"); 
+        if (n->prev) n->prev->next = n->next; 
+        if (n->next) n->next->prev = n->prev; 
+        if (my_nodes_last == n) my_nodes_last = n->prev; 
+        if (my_nodes == n) my_nodes = n->next; 
+    } 
+    n->prev = n->next = NULL; 
+} 
+ 
+inline void graph::reset( reset_flags f ) { 
+    // reset context 
+    deactivate_graph(*this); 
+ 
+    my_context->reset(); 
+    cancelled = false; 
+    caught_exception = false; 
+    // reset all the nodes comprising the graph 
+    for(iterator ii = begin(); ii != end(); ++ii) { 
+        graph_node *my_p = &(*ii); 
+        my_p->reset_node(f); 
+    } 
+    // Reattach the arena. Might be useful to run the graph in a particular task_arena 
+    // while not limiting graph lifetime to a single task_arena::execute() call. 
+    prepare_task_arena( /*reinit=*/true ); 
+    activate_graph(*this); 
+} 
+ 
+inline void graph::cancel() { 
+    my_context->cancel_group_execution(); 
+} 
+ 
+inline graph::iterator graph::begin() { return iterator(this, true); } 
+ 
+inline graph::iterator graph::end() { return iterator(this, false); } 
+ 
+inline graph::const_iterator graph::begin() const { return const_iterator(this, true); } 
+ 
+inline graph::const_iterator graph::end() const { return const_iterator(this, false); } 
+ 
+inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); } 
+ 
+inline graph::const_iterator graph::cend() const { return const_iterator(this, false); } 
+ 
+inline graph_node::graph_node(graph& g) : my_graph(g) { 
+    my_graph.register_node(this); 
+} 
+ 
+inline graph_node::~graph_node() { 
+    my_graph.remove_node(this); 
+} 
+ 
+#include "detail/_flow_graph_node_impl.h" 
+ 
+ 
+//! An executable node that acts as a source, i.e. it has no predecessors 
+ 
+template < typename Output > 
+class input_node : public graph_node, public sender< Output > { 
+public: 
+    //! The type of the output message, which is complete 
+    typedef Output output_type; 
+ 
+    //! The type of successors of this node 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    // Input node has no input type 
+    typedef null_type input_type; 
+ 
+    //! Constructor for a node with a successor 
+    template< typename Body > 
+     __TBB_NOINLINE_SYM input_node( graph &g, Body body ) 
+         : graph_node(g), my_active(false) 
+         , my_body( new input_body_leaf< output_type, Body>(body) ) 
+         , my_init_body( new input_body_leaf< output_type, Body>(body) ) 
+         , my_successors(this), my_reserved(false), my_has_cached_item(false) 
+    { 
+        fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, 
+                           static_cast<sender<output_type> *>(this), this->my_body); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Successors> 
+    input_node( const node_set<order::preceding, Successors...>& successors, Body body ) 
+        : input_node(successors.graph_reference(), body) 
+    { 
+        make_edges(*this, successors); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM input_node( const input_node& src ) 
+        : graph_node(src.my_graph), sender<Output>() 
+        , my_active(false) 
+        , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) 
+        , my_successors(this), my_reserved(false), my_has_cached_item(false) 
+    { 
+        fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, 
+                           static_cast<sender<output_type> *>(this), this->my_body); 
+    } 
+ 
+    //! The destructor 
+    ~input_node() { delete my_body; delete my_init_body; } 
+ 
+    //! Add a new successor to this node 
+    bool register_successor( successor_type &r ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        my_successors.register_successor(r); 
+        if ( my_active ) 
+            spawn_put(); 
+        return true; 
+    } 
+ 
+    //! Removes a successor from this node 
+    bool remove_successor( successor_type &r ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        my_successors.remove_successor(r); 
+        return true; 
+    } 
+ 
+    //! Request an item from the node 
+    bool try_get( output_type &v ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        if ( my_reserved ) 
+            return false; 
+ 
+        if ( my_has_cached_item ) { 
+            v = my_cached_item; 
+            my_has_cached_item = false; 
+            return true; 
+        } 
+        // we've been asked to provide an item, but we have none.  enqueue a task to 
+        // provide one. 
+        if ( my_active ) 
+            spawn_put(); 
+        return false; 
+    } 
+ 
+    //! Reserves an item. 
+    bool try_reserve( output_type &v ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        if ( my_reserved ) { 
+            return false; 
+        } 
+ 
+        if ( my_has_cached_item ) { 
+            v = my_cached_item; 
+            my_reserved = true; 
+            return true; 
+        } else { 
+            return false; 
+        } 
+    } 
+ 
+    //! Release a reserved item. 
+    /** true = item has been released and so remains in sender, dest must request or reserve future items */ 
+    bool try_release( ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" ); 
+        my_reserved = false; 
+        if(!my_successors.empty()) 
+            spawn_put(); 
+        return true; 
+    } 
+ 
+    //! Consumes a reserved item 
+    bool try_consume( ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" ); 
+        my_reserved = false; 
+        my_has_cached_item = false; 
+        if ( !my_successors.empty() ) { 
+            spawn_put(); 
+        } 
+        return true; 
+    } 
+ 
+    //! Activates a node that was created in the inactive state 
+    void activate() { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        my_active = true; 
+        if (!my_successors.empty()) 
+            spawn_put(); 
+    } 
+ 
+    template<typename Body> 
+    Body copy_function_object() { 
+        input_body<output_type> &body_ref = *this->my_body; 
+        return dynamic_cast< input_body_leaf<output_type, Body> & >(body_ref).get_body(); 
+    } 
+ 
+protected: 
+ 
+    //! resets the input_node to its initial state 
+    void reset_node( reset_flags f) override { 
+        my_active = false; 
+        my_reserved = false; 
+        my_has_cached_item = false; 
+ 
+        if(f & rf_clear_edges) my_successors.clear(); 
+        if(f & rf_reset_bodies) { 
+            input_body<output_type> *tmp = my_init_body->clone(); 
+            delete my_body; 
+            my_body = tmp; 
+        } 
+    } 
+ 
+private: 
+    spin_mutex my_mutex; 
+    bool my_active; 
+    input_body<output_type> *my_body; 
+    input_body<output_type> *my_init_body; 
+    broadcast_cache< output_type > my_successors; 
+    bool my_reserved; 
+    bool my_has_cached_item; 
+    output_type my_cached_item; 
+ 
+    // used by apply_body_bypass, can invoke body of node. 
+    bool try_reserve_apply_body(output_type &v) { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        if ( my_reserved ) { 
+            return false; 
+        } 
+        if ( !my_has_cached_item ) { 
+            flow_control control; 
+ 
+            fgt_begin_body( my_body ); 
+ 
+            my_cached_item = (*my_body)(control); 
+            my_has_cached_item = !control.is_pipeline_stopped; 
+ 
+            fgt_end_body( my_body ); 
+        } 
+        if ( my_has_cached_item ) { 
+            v = my_cached_item; 
+            my_reserved = true; 
+            return true; 
+        } else { 
+            return false; 
+        } 
+    } 
+ 
+    graph_task* create_put_task() { 
+        small_object_allocator allocator{}; 
+        typedef input_node_task_bypass< input_node<output_type> > task_type; 
+        graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); 
+        my_graph.reserve_wait(); 
+        return t; 
+    } 
+ 
+    //! Spawns a task that applies the body 
+    void spawn_put( ) { 
+        if(is_graph_active(this->my_graph)) { 
+            spawn_in_graph_arena(this->my_graph, *create_put_task()); 
+        } 
+    } 
+ 
+    friend class input_node_task_bypass< input_node<output_type> >; 
+    //! Applies the body.  Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it. 
+    graph_task* apply_body_bypass( ) { 
+        output_type v; 
+        if ( !try_reserve_apply_body(v) ) 
+            return NULL; 
+ 
+        graph_task *last_task = my_successors.try_put_task(v); 
+        if ( last_task ) 
+            try_consume(); 
+        else 
+            try_release(); 
+        return last_task; 
+    } 
+};  // class input_node 
+ 
+//! Implements a function node that supports Input -> Output 
+template<typename Input, typename Output = continue_msg, typename Policy = queueing> 
+class function_node 
+    : public graph_node 
+    , public function_input< Input, Output, Policy, cache_aligned_allocator<Input> > 
+    , public function_output<Output> 
+{ 
+    typedef cache_aligned_allocator<Input> internals_allocator; 
+ 
+public: 
+    typedef Input input_type; 
+    typedef Output output_type; 
+    typedef function_input<input_type,output_type,Policy,internals_allocator> input_impl_type; 
+    typedef function_input_queue<input_type, internals_allocator> input_queue_type; 
+    typedef function_output<output_type> fOutput_type; 
+    typedef typename input_impl_type::predecessor_type predecessor_type; 
+    typedef typename fOutput_type::successor_type successor_type; 
+ 
+    using input_impl_type::my_predecessors; 
+ 
+    //! Constructor 
+    // input_queue_type is allocated here, but destroyed in the function_input_base. 
+    // TODO: pass the graph_buffer_policy to the function_input_base so it can all 
+    // be done in one place.  This would be an interface-breaking change. 
+    template< typename Body > 
+     __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency, 
+                   Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) 
+        : graph_node(g), input_impl_type(g, concurrency, body, a_priority), 
+          fOutput_type(g) { 
+        fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, 
+                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); 
+    } 
+ 
+    template <typename Body> 
+    function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority ) 
+        : function_node(g, concurrency, body, Policy(), a_priority) {} 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Args> 
+    function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, 
+                   Policy p = Policy(), node_priority_t a_priority = no_priority ) 
+        : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+ 
+    template <typename Body, typename... Args> 
+    function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority ) 
+        : function_node(nodes, concurrency, body, Policy(), a_priority) {} 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM function_node( const function_node& src ) : 
+        graph_node(src.my_graph), 
+        input_impl_type(src), 
+        fOutput_type(src.my_graph) { 
+        fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, 
+                static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); 
+    } 
+ 
+protected: 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    using input_impl_type::try_put_task; 
+ 
+    broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } 
+ 
+    void reset_node(reset_flags f) override { 
+        input_impl_type::reset_function_input(f); 
+        // TODO: use clear() instead. 
+        if(f & rf_clear_edges) { 
+            successors().clear(); 
+            my_predecessors.clear(); 
+        } 
+        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty"); 
+        __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty"); 
+    } 
+ 
+};  // class function_node 
+ 
+//! implements a function node that supports Input -> (set of outputs) 
+// Output is a tuple of output types. 
+template<typename Input, typename Output, typename Policy = queueing> 
+class multifunction_node : 
+    public graph_node, 
+    public multifunction_input 
+    < 
+        Input, 
+        typename wrap_tuple_elements< 
+            std::tuple_size<Output>::value,  // #elements in tuple 
+            multifunction_output,  // wrap this around each element 
+            Output // the tuple providing the types 
+        >::type, 
+        Policy, 
+        cache_aligned_allocator<Input> 
+    > 
+{ 
+    typedef cache_aligned_allocator<Input> internals_allocator; 
+ 
+protected: 
+    static const int N = std::tuple_size<Output>::value; 
+public: 
+    typedef Input input_type; 
+    typedef null_type output_type; 
+    typedef typename wrap_tuple_elements<N,multifunction_output, Output>::type output_ports_type; 
+    typedef multifunction_input< 
+        input_type, output_ports_type, Policy, internals_allocator> input_impl_type; 
+    typedef function_input_queue<input_type, internals_allocator> input_queue_type; 
+private: 
+    using input_impl_type::my_predecessors; 
+public: 
+    template<typename Body> 
+    __TBB_NOINLINE_SYM multifunction_node( 
+        graph &g, size_t concurrency, 
+        Body body, Policy = Policy(), node_priority_t a_priority = no_priority 
+    ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) { 
+        fgt_multioutput_node_with_body<N>( 
+            CODEPTR(), FLOW_MULTIFUNCTION_NODE, 
+            &this->my_graph, static_cast<receiver<input_type> *>(this), 
+            this->output_ports(), this->my_body 
+        ); 
+    } 
+ 
+    template <typename Body> 
+    __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) 
+        : multifunction_node(g, concurrency, body, Policy(), a_priority) {} 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Args> 
+    __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, 
+                       Policy p = Policy(), node_priority_t a_priority = no_priority) 
+        : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+ 
+    template <typename Body, typename... Args> 
+    __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) 
+        : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {} 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+    __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) : 
+        graph_node(other.my_graph), input_impl_type(other) { 
+        fgt_multioutput_node_with_body<N>( CODEPTR(), FLOW_MULTIFUNCTION_NODE, 
+                &this->my_graph, static_cast<receiver<input_type> *>(this), 
+                this->output_ports(), this->my_body ); 
+    } 
+ 
+    // all the guts are in multifunction_input... 
+protected: 
+    void reset_node(reset_flags f) override { input_impl_type::reset(f); } 
+};  // multifunction_node 
+ 
+//! split_node: accepts a tuple as input, forwards each element of the tuple to its 
+//  successors.  The node has unlimited concurrency, so it does not reject inputs. 
+template<typename TupleType> 
+class split_node : public graph_node, public receiver<TupleType> { 
+    static const int N = std::tuple_size<TupleType>::value; 
+    typedef receiver<TupleType> base_type; 
+public: 
+    typedef TupleType input_type; 
+    typedef typename wrap_tuple_elements< 
+            N,  // #elements in tuple 
+            multifunction_output,  // wrap this around each element 
+            TupleType // the tuple providing the types 
+        >::type  output_ports_type; 
+ 
+    __TBB_NOINLINE_SYM explicit split_node(graph &g) 
+        : graph_node(g), 
+          my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)) 
+    { 
+        fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, 
+            static_cast<receiver<input_type> *>(this), this->output_ports()); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    __TBB_NOINLINE_SYM split_node(const node_set<Args...>& nodes) : split_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    __TBB_NOINLINE_SYM split_node(const split_node& other) 
+        : graph_node(other.my_graph), base_type(other), 
+          my_output_ports(init_output_ports<output_ports_type>::call(other.my_graph, my_output_ports)) 
+    { 
+        fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, 
+            static_cast<receiver<input_type> *>(this), this->output_ports()); 
+    } 
+ 
+    output_ports_type &output_ports() { return my_output_ports; } 
+ 
+protected: 
+    graph_task *try_put_task(const TupleType& t) override { 
+        // Sending split messages in parallel is not justified, as overheads would prevail. 
+        // Also, we do not have successors here. So we just tell the task returned here is successful. 
+        return emit_element<N>::emit_this(this->my_graph, t, output_ports()); 
+    } 
+    void reset_node(reset_flags f) override { 
+        if (f & rf_clear_edges) 
+            clear_element<N>::clear_this(my_output_ports); 
+ 
+        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "split_node reset failed"); 
+    } 
+    graph& graph_reference() const override { 
+        return my_graph; 
+    } 
+ 
+private: 
+    output_ports_type my_output_ports; 
+}; 
+ 
+//! Implements an executable node that supports continue_msg -> Output 
+template <typename Output, typename Policy = Policy<void> > 
+class continue_node : public graph_node, public continue_input<Output, Policy>, 
+                      public function_output<Output> { 
+public: 
+    typedef continue_msg input_type; 
+    typedef Output output_type; 
+    typedef continue_input<Output, Policy> input_impl_type; 
+    typedef function_output<output_type> fOutput_type; 
+    typedef typename input_impl_type::predecessor_type predecessor_type; 
+    typedef typename fOutput_type::successor_type successor_type; 
+ 
+    //! Constructor for executable node with continue_msg -> Output 
+    template <typename Body > 
+    __TBB_NOINLINE_SYM continue_node( 
+        graph &g, 
+        Body body, Policy = Policy(), node_priority_t a_priority = no_priority 
+    ) : graph_node(g), input_impl_type( g, body, a_priority ), 
+        fOutput_type(g) { 
+        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, 
+ 
+                                           static_cast<receiver<input_type> *>(this), 
+                                           static_cast<sender<output_type> *>(this), this->my_body ); 
+    } 
+ 
+    template <typename Body> 
+    continue_node( graph& g, Body body, node_priority_t a_priority ) 
+        : continue_node(g, body, Policy(), a_priority) {} 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Args> 
+    continue_node( const node_set<Args...>& nodes, Body body, 
+                   Policy p = Policy(), node_priority_t a_priority = no_priority ) 
+        : continue_node(nodes.graph_reference(), body, p, a_priority ) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+    template <typename Body, typename... Args> 
+    continue_node( const node_set<Args...>& nodes, Body body, node_priority_t a_priority) 
+        : continue_node(nodes, body, Policy(), a_priority) {} 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+    //! Constructor for executable node with continue_msg -> Output 
+    template <typename Body > 
+    __TBB_NOINLINE_SYM continue_node( 
+        graph &g, int number_of_predecessors, 
+        Body body, Policy = Policy(), node_priority_t a_priority = no_priority 
+    ) : graph_node(g) 
+      , input_impl_type(g, number_of_predecessors, body, a_priority), 
+        fOutput_type(g) { 
+        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, 
+                                           static_cast<receiver<input_type> *>(this), 
+                                           static_cast<sender<output_type> *>(this), this->my_body ); 
+    } 
+ 
+    template <typename Body> 
+    continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority) 
+        : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {} 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Args> 
+    continue_node( const node_set<Args...>& nodes, int number_of_predecessors, 
+                   Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) 
+        : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+ 
+    template <typename Body, typename... Args> 
+    continue_node( const node_set<Args...>& nodes, int number_of_predecessors, 
+                   Body body, node_priority_t a_priority ) 
+        : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {} 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM continue_node( const continue_node& src ) : 
+        graph_node(src.my_graph), input_impl_type(src), 
+        function_output<Output>(src.my_graph) { 
+        fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, 
+                                           static_cast<receiver<input_type> *>(this), 
+                                           static_cast<sender<output_type> *>(this), this->my_body ); 
+    } 
+ 
+protected: 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    using input_impl_type::try_put_task; 
+    broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } 
+ 
+    void reset_node(reset_flags f) override { 
+        input_impl_type::reset_receiver(f); 
+        if(f & rf_clear_edges)successors().clear(); 
+        __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset"); 
+    } 
+};  // continue_node 
+ 
+//! Forwards messages of type T to all successors 
+template <typename T> 
+class broadcast_node : public graph_node, public receiver<T>, public sender<T> { 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+private: 
+    broadcast_cache<input_type> my_successors; 
+public: 
+ 
+    __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { 
+        fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, 
+                  static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    broadcast_node(const node_set<Args...>& nodes) : broadcast_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} 
+ 
+    //! Adds a successor 
+    bool register_successor( successor_type &r ) override { 
+        my_successors.register_successor( r ); 
+        return true; 
+    } 
+ 
+    //! Removes s as a successor 
+    bool remove_successor( successor_type &r ) override { 
+        my_successors.remove_successor( r ); 
+        return true; 
+    } 
+ 
+protected: 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    //! build a task to run the successor if possible.  Default is old behavior. 
+    graph_task *try_put_task(const T& t) override { 
+        graph_task *new_task = my_successors.try_put_task(t); 
+        if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; 
+        return new_task; 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_graph; 
+    } 
+ 
+    void reset_node(reset_flags f) override { 
+        if (f&rf_clear_edges) { 
+           my_successors.clear(); 
+        } 
+        __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node"); 
+    } 
+};  // broadcast_node 
+ 
+//! Forwards messages in arbitrary order 
+template <typename T> 
+class buffer_node 
+    : public graph_node 
+    , public reservable_item_buffer< T, cache_aligned_allocator<T> > 
+    , public receiver<T>, public sender<T> 
+{ 
+    typedef cache_aligned_allocator<T> internals_allocator; 
+ 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+    typedef buffer_node<T> class_type; 
+ 
+protected: 
+    typedef size_t size_type; 
+    round_robin_cache< T, null_rw_mutex > my_successors; 
+ 
+    friend class forward_task_bypass< class_type >; 
+ 
+    enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task 
+    }; 
+ 
+    // implements the aggregator_operation concept 
+    class buffer_operation : public aggregated_operation< buffer_operation > { 
+    public: 
+        char type; 
+        T* elem; 
+        graph_task* ltask; 
+        successor_type *r; 
+ 
+        buffer_operation(const T& e, op_type t) : type(char(t)) 
+                                                  , elem(const_cast<T*>(&e)) , ltask(NULL) 
+        {} 
+        buffer_operation(op_type t) : type(char(t)),  ltask(NULL) {} 
+    }; 
+ 
+    bool forwarder_busy; 
+    typedef aggregating_functor<class_type, buffer_operation> handler_type; 
+    friend class aggregating_functor<class_type, buffer_operation>; 
+    aggregator< handler_type, buffer_operation> my_aggregator; 
+ 
+    virtual void handle_operations(buffer_operation *op_list) { 
+        handle_operations_impl(op_list, this); 
+    } 
+ 
+    template<typename derived_type> 
+    void handle_operations_impl(buffer_operation *op_list, derived_type* derived) { 
+        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); 
+ 
+        buffer_operation *tmp = NULL; 
+        bool try_forwarding = false; 
+        while (op_list) { 
+            tmp = op_list; 
+            op_list = op_list->next; 
+            switch (tmp->type) { 
+            case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break; 
+            case rem_succ: internal_rem_succ(tmp); break; 
+            case req_item: internal_pop(tmp); break; 
+            case res_item: internal_reserve(tmp); break; 
+            case rel_res:  internal_release(tmp); try_forwarding = true; break; 
+            case con_res:  internal_consume(tmp); try_forwarding = true; break; 
+            case put_item: try_forwarding = internal_push(tmp); break; 
+            case try_fwd_task: internal_forward_task(tmp); break; 
+            } 
+        } 
+ 
+        derived->order(); 
+ 
+        if (try_forwarding && !forwarder_busy) { 
+            if(is_graph_active(this->my_graph)) { 
+                forwarder_busy = true; 
+                typedef forward_task_bypass<class_type> task_type; 
+                small_object_allocator allocator{}; 
+                graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this); 
+                my_graph.reserve_wait(); 
+                // tmp should point to the last item handled by the aggregator.  This is the operation 
+                // the handling thread enqueued.  So modifying that record will be okay. 
+                // TODO revamp: check that the issue is still present 
+                // workaround for icc bug  (at least 12.0 and 13.0) 
+                // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list 
+                //        argument types are: (graph, graph_task *, graph_task *) 
+                graph_task *z = tmp->ltask; 
+                graph &g = this->my_graph; 
+                tmp->ltask = combine_tasks(g, z, new_task);  // in case the op generated a task 
+            } 
+        } 
+    }  // handle_operations 
+ 
+    inline graph_task *grab_forwarding_task( buffer_operation &op_data) { 
+        return op_data.ltask; 
+    } 
+ 
+    inline bool enqueue_forwarding_task(buffer_operation &op_data) { 
+        graph_task *ft = grab_forwarding_task(op_data); 
+        if(ft) { 
+            spawn_in_graph_arena(graph_reference(), *ft); 
+            return true; 
+        } 
+        return false; 
+    } 
+ 
+    //! This is executed by an enqueued task, the "forwarder" 
+    virtual graph_task *forward_task() { 
+        buffer_operation op_data(try_fwd_task); 
+        graph_task *last_task = NULL; 
+        do { 
+            op_data.status = WAIT; 
+            op_data.ltask = NULL; 
+            my_aggregator.execute(&op_data); 
+ 
+            // workaround for icc bug 
+            graph_task *xtask = op_data.ltask; 
+            graph& g = this->my_graph; 
+            last_task = combine_tasks(g, last_task, xtask); 
+        } while (op_data.status ==SUCCEEDED); 
+        return last_task; 
+    } 
+ 
+    //! Register successor 
+    virtual void internal_reg_succ(buffer_operation *op) { 
+        my_successors.register_successor(*(op->r)); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+    } 
+ 
+    //! Remove successor 
+    virtual void internal_rem_succ(buffer_operation *op) { 
+        my_successors.remove_successor(*(op->r)); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+    } 
+ 
+private: 
+    void order() {} 
+ 
+    bool is_item_valid() { 
+        return this->my_item_valid(this->my_tail - 1); 
+    } 
+ 
+    void try_put_and_add_task(graph_task*& last_task) { 
+        graph_task *new_task = my_successors.try_put_task(this->back()); 
+        if (new_task) { 
+            // workaround for icc bug 
+            graph& g = this->my_graph; 
+            last_task = combine_tasks(g, last_task, new_task); 
+            this->destroy_back(); 
+        } 
+    } 
+ 
+protected: 
+    //! Tries to forward valid items to successors 
+    virtual void internal_forward_task(buffer_operation *op) { 
+        internal_forward_task_impl(op, this); 
+    } 
+ 
+    template<typename derived_type> 
+    void internal_forward_task_impl(buffer_operation *op, derived_type* derived) { 
+        __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); 
+ 
+        if (this->my_reserved || !derived->is_item_valid()) { 
+            op->status.store(FAILED, std::memory_order_release); 
+            this->forwarder_busy = false; 
+            return; 
+        } 
+        // Try forwarding, giving each successor a chance 
+        graph_task* last_task = NULL; 
+        size_type counter = my_successors.size(); 
+        for (; counter > 0 && derived->is_item_valid(); --counter) 
+            derived->try_put_and_add_task(last_task); 
+ 
+        op->ltask = last_task;  // return task 
+        if (last_task && !counter) { 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } 
+        else { 
+            op->status.store(FAILED, std::memory_order_release); 
+            forwarder_busy = false; 
+        } 
+    } 
+ 
+    virtual bool internal_push(buffer_operation *op) { 
+        this->push_back(*(op->elem)); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        return true; 
+    } 
+ 
+    virtual void internal_pop(buffer_operation *op) { 
+        if(this->pop_back(*(op->elem))) { 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } 
+        else { 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+    } 
+ 
+    virtual void internal_reserve(buffer_operation *op) { 
+        if(this->reserve_front(*(op->elem))) { 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } 
+        else { 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+    } 
+ 
+    virtual void internal_consume(buffer_operation *op) { 
+        this->consume_front(); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+    } 
+ 
+    virtual void internal_release(buffer_operation *op) { 
+        this->release_front(); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+    } 
+ 
+public: 
+    //! Constructor 
+    __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) 
+        : graph_node(g), reservable_item_buffer<T, internals_allocator>(), receiver<T>(), 
+          sender<T>(), my_successors(this), forwarder_busy(false) 
+    { 
+        my_aggregator.initialize_handler(handler_type(this)); 
+        fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, 
+                                 static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    buffer_node(const node_set<Args...>& nodes) : buffer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} 
+ 
+    // 
+    // message sender implementation 
+    // 
+ 
+    //! Adds a new successor. 
+    /** Adds successor r to the list of successors; may forward tasks.  */ 
+    bool register_successor( successor_type &r ) override { 
+        buffer_operation op_data(reg_succ); 
+        op_data.r = &r; 
+        my_aggregator.execute(&op_data); 
+        (void)enqueue_forwarding_task(op_data); 
+        return true; 
+    } 
+ 
+    //! Removes a successor. 
+    /** Removes successor r from the list of successors. 
+        It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ 
+    bool remove_successor( successor_type &r ) override { 
+        // TODO revamp: investigate why full qualification is necessary here 
+        tbb::detail::d1::remove_predecessor(r, *this); 
+        buffer_operation op_data(rem_succ); 
+        op_data.r = &r; 
+        my_aggregator.execute(&op_data); 
+        // even though this operation does not cause a forward, if we are the handler, and 
+        // a forward is scheduled, we may be the first to reach this point after the aggregator, 
+        // and so should check for the task. 
+        (void)enqueue_forwarding_task(op_data); 
+        return true; 
+    } 
+ 
+    //! Request an item from the buffer_node 
+    /**  true = v contains the returned item<BR> 
+         false = no item has been returned */ 
+    bool try_get( T &v ) override { 
+        buffer_operation op_data(req_item); 
+        op_data.elem = &v; 
+        my_aggregator.execute(&op_data); 
+        (void)enqueue_forwarding_task(op_data); 
+        return (op_data.status==SUCCEEDED); 
+    } 
+ 
+    //! Reserves an item. 
+    /**  false = no item can be reserved<BR> 
+         true = an item is reserved */ 
+    bool try_reserve( T &v ) override { 
+        buffer_operation op_data(res_item); 
+        op_data.elem = &v; 
+        my_aggregator.execute(&op_data); 
+        (void)enqueue_forwarding_task(op_data); 
+        return (op_data.status==SUCCEEDED); 
+    } 
+ 
+    //! Release a reserved item. 
+    /**  true = item has been released and so remains in sender */ 
+    bool try_release() override { 
+        buffer_operation op_data(rel_res); 
+        my_aggregator.execute(&op_data); 
+        (void)enqueue_forwarding_task(op_data); 
+        return true; 
+    } 
+ 
+    //! Consumes a reserved item. 
+    /** true = item is removed from sender and reservation removed */ 
+    bool try_consume() override { 
+        buffer_operation op_data(con_res); 
+        my_aggregator.execute(&op_data); 
+        (void)enqueue_forwarding_task(op_data); 
+        return true; 
+    } 
+ 
+protected: 
+ 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    //! receive an item, return a task *if possible 
+    graph_task *try_put_task(const T &t) override { 
+        buffer_operation op_data(t, put_item); 
+        my_aggregator.execute(&op_data); 
+        graph_task *ft = grab_forwarding_task(op_data); 
+        // sequencer_nodes can return failure (if an item has been previously inserted) 
+        // We have to spawn the returned task if our own operation fails. 
+ 
+        if(ft && op_data.status ==FAILED) { 
+            // we haven't succeeded queueing the item, but for some reason the 
+            // call returned a task (if another request resulted in a successful 
+            // forward this could happen.)  Queue the task and reset the pointer. 
+            spawn_in_graph_arena(graph_reference(), *ft); ft = NULL; 
+        } 
+        else if(!ft && op_data.status ==SUCCEEDED) { 
+            ft = SUCCESSFULLY_ENQUEUED; 
+        } 
+        return ft; 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_graph; 
+    } 
+ 
+protected: 
+    void reset_node( reset_flags f) override { 
+        reservable_item_buffer<T, internals_allocator>::reset(); 
+        // TODO: just clear structures 
+        if (f&rf_clear_edges) { 
+            my_successors.clear(); 
+        } 
+        forwarder_busy = false; 
+    } 
+};  // buffer_node 
+ 
+//! Forwards messages in FIFO order 
+template <typename T> 
+class queue_node : public buffer_node<T> { 
+protected: 
+    typedef buffer_node<T> base_type; 
+    typedef typename base_type::size_type size_type; 
+    typedef typename base_type::buffer_operation queue_operation; 
+    typedef queue_node class_type; 
+ 
+private: 
+    template<typename> friend class buffer_node; 
+ 
+    bool is_item_valid() { 
+        return this->my_item_valid(this->my_head); 
+    } 
+ 
+    void try_put_and_add_task(graph_task*& last_task) { 
+        graph_task *new_task = this->my_successors.try_put_task(this->front()); 
+        if (new_task) { 
+            // workaround for icc bug 
+            graph& graph_ref = this->graph_reference(); 
+            last_task = combine_tasks(graph_ref, last_task, new_task); 
+            this->destroy_front(); 
+        } 
+    } 
+ 
+protected: 
+    void internal_forward_task(queue_operation *op) override { 
+        this->internal_forward_task_impl(op, this); 
+    } 
+ 
+    void internal_pop(queue_operation *op) override { 
+        if ( this->my_reserved || !this->my_item_valid(this->my_head)){ 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+        else { 
+            this->pop_front(*(op->elem)); 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } 
+    } 
+    void internal_reserve(queue_operation *op) override { 
+        if (this->my_reserved || !this->my_item_valid(this->my_head)) { 
+            op->status.store(FAILED, std::memory_order_release); 
+        } 
+        else { 
+            this->reserve_front(*(op->elem)); 
+            op->status.store(SUCCEEDED, std::memory_order_release); 
+        } 
+    } 
+    void internal_consume(queue_operation *op) override { 
+        this->consume_front(); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+    } 
+ 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    //! Constructor 
+    __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) { 
+        fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    queue_node( const node_set<Args...>& nodes) : queue_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) { 
+        fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+ 
+protected: 
+    void reset_node( reset_flags f) override { 
+        base_type::reset_node(f); 
+    } 
+};  // queue_node 
+ 
+//! Forwards messages in sequence order 
+template <typename T> 
+class sequencer_node : public queue_node<T> { 
+    function_body< T, size_t > *my_sequencer; 
+    // my_sequencer should be a benign function and must be callable 
+    // from a parallel context.  Does this mean it needn't be reset? 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    //! Constructor 
+    template< typename Sequencer > 
+    __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node<T>(g), 
+        my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) { 
+        fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Sequencer, typename... Args> 
+    sequencer_node( const node_set<Args...>& nodes, const Sequencer& s) 
+        : sequencer_node(nodes.graph_reference(), s) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node<T>(src), 
+        my_sequencer( src.my_sequencer->clone() ) { 
+        fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+    //! Destructor 
+    ~sequencer_node() { delete my_sequencer; } 
+ 
+protected: 
+    typedef typename buffer_node<T>::size_type size_type; 
+    typedef typename buffer_node<T>::buffer_operation sequencer_operation; 
+ 
+private: 
+    bool internal_push(sequencer_operation *op) override { 
+        size_type tag = (*my_sequencer)(*(op->elem)); 
+#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES 
+        if (tag < this->my_head) { 
+            // have already emitted a message with this tag 
+            op->status.store(FAILED, std::memory_order_release); 
+            return false; 
+        } 
+#endif 
+        // cannot modify this->my_tail now; the buffer would be inconsistent. 
+        size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail; 
+ 
+        if (this->size(new_tail) > this->capacity()) { 
+            this->grow_my_array(this->size(new_tail)); 
+        } 
+        this->my_tail = new_tail; 
+ 
+        const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; 
+        op->status.store(res, std::memory_order_release); 
+        return res ==SUCCEEDED; 
+    } 
+};  // sequencer_node 
+ 
+//! Forwards messages in priority order 
+template<typename T, typename Compare = std::less<T>> 
+class priority_queue_node : public buffer_node<T> { 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef buffer_node<T> base_type; 
+    typedef priority_queue_node class_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    //! Constructor 
+    __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() ) 
+        : buffer_node<T>(g), compare(comp), mark(0) { 
+        fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    priority_queue_node(const node_set<Args...>& nodes, const Compare& comp = Compare()) 
+        : priority_queue_node(nodes.graph_reference(), comp) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src ) 
+        : buffer_node<T>(src), mark(0) 
+    { 
+        fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+protected: 
+ 
+    void reset_node( reset_flags f) override { 
+        mark = 0; 
+        base_type::reset_node(f); 
+    } 
+ 
+    typedef typename buffer_node<T>::size_type size_type; 
+    typedef typename buffer_node<T>::item_type item_type; 
+    typedef typename buffer_node<T>::buffer_operation prio_operation; 
+ 
+    //! Tries to forward valid items to successors 
+    void internal_forward_task(prio_operation *op) override { 
+        this->internal_forward_task_impl(op, this); 
+    } 
+ 
+    void handle_operations(prio_operation *op_list) override { 
+        this->handle_operations_impl(op_list, this); 
+    } 
+ 
+    bool internal_push(prio_operation *op) override { 
+        prio_push(*(op->elem)); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        return true; 
+    } 
+ 
+    void internal_pop(prio_operation *op) override { 
+        // if empty or already reserved, don't pop 
+        if ( this->my_reserved == true || this->my_tail == 0 ) { 
+            op->status.store(FAILED, std::memory_order_release); 
+            return; 
+        } 
+ 
+        *(op->elem) = prio(); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        prio_pop(); 
+ 
+    } 
+ 
+    // pops the highest-priority item, saves copy 
+    void internal_reserve(prio_operation *op) override { 
+        if (this->my_reserved == true || this->my_tail == 0) { 
+            op->status.store(FAILED, std::memory_order_release); 
+            return; 
+        } 
+        this->my_reserved = true; 
+        *(op->elem) = prio(); 
+        reserved_item = *(op->elem); 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        prio_pop(); 
+    } 
+ 
+    void internal_consume(prio_operation *op) override { 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        this->my_reserved = false; 
+        reserved_item = input_type(); 
+    } 
+ 
+    void internal_release(prio_operation *op) override { 
+        op->status.store(SUCCEEDED, std::memory_order_release); 
+        prio_push(reserved_item); 
+        this->my_reserved = false; 
+        reserved_item = input_type(); 
+    } 
+ 
+private: 
+    template<typename> friend class buffer_node; 
+ 
+    void order() { 
+        if (mark < this->my_tail) heapify(); 
+        __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify"); 
+    } 
+ 
+    bool is_item_valid() { 
+        return this->my_tail > 0; 
+    } 
+ 
+    void try_put_and_add_task(graph_task*& last_task) { 
+        graph_task * new_task = this->my_successors.try_put_task(this->prio()); 
+        if (new_task) { 
+            // workaround for icc bug 
+            graph& graph_ref = this->graph_reference(); 
+            last_task = combine_tasks(graph_ref, last_task, new_task); 
+            prio_pop(); 
+        } 
+    } 
+ 
+private: 
+    Compare compare; 
+    size_type mark; 
+ 
+    input_type reserved_item; 
+ 
+    // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item 
+    bool prio_use_tail() { 
+        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test"); 
+        return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1)); 
+    } 
+ 
+    // prio_push: checks that the item will fit, expand array if necessary, put at end 
+    void prio_push(const T &src) { 
+        if ( this->my_tail >= this->my_array_size ) 
+            this->grow_my_array( this->my_tail + 1 ); 
+        (void) this->place_item(this->my_tail, src); 
+        ++(this->my_tail); 
+        __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); 
+    } 
+ 
+    // prio_pop: deletes highest priority item from the array, and if it is item 
+    // 0, move last item to 0 and reheap.  If end of array, just destroy and decrement tail 
+    // and mark.  Assumes the array has already been tested for emptiness; no failure. 
+    void prio_pop()  { 
+        if (prio_use_tail()) { 
+            // there are newly pushed elements; last one higher than top 
+            // copy the data 
+            this->destroy_item(this->my_tail-1); 
+            --(this->my_tail); 
+            __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); 
+            return; 
+        } 
+        this->destroy_item(0); 
+        if(this->my_tail > 1) { 
+            // push the last element down heap 
+            __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL); 
+            this->move_item(0,this->my_tail - 1); 
+        } 
+        --(this->my_tail); 
+        if(mark > this->my_tail) --mark; 
+        if (this->my_tail > 1) // don't reheap for heap of size 1 
+            reheap(); 
+        __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); 
+    } 
+ 
+    const T& prio() { 
+        return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); 
+    } 
+ 
+    // turn array into heap 
+    void heapify() { 
+        if(this->my_tail == 0) { 
+            mark = 0; 
+            return; 
+        } 
+        if (!mark) mark = 1; 
+        for (; mark<this->my_tail; ++mark) { // for each unheaped element 
+            size_type cur_pos = mark; 
+            input_type to_place; 
+            this->fetch_item(mark,to_place); 
+            do { // push to_place up the heap 
+                size_type parent = (cur_pos-1)>>1; 
+                if (!compare(this->get_my_item(parent), to_place)) 
+                    break; 
+                this->move_item(cur_pos, parent); 
+                cur_pos = parent; 
+            } while( cur_pos ); 
+            (void) this->place_item(cur_pos, to_place); 
+        } 
+    } 
+ 
+    // otherwise heapified array with new root element; rearrange to heap 
+    void reheap() { 
+        size_type cur_pos=0, child=1; 
+        while (child < mark) { 
+            size_type target = child; 
+            if (child+1<mark && 
+                compare(this->get_my_item(child), 
+                        this->get_my_item(child+1))) 
+                ++target; 
+            // target now has the higher priority child 
+            if (compare(this->get_my_item(target), 
+                        this->get_my_item(cur_pos))) 
+                break; 
+            // swap 
+            this->swap_items(cur_pos, target); 
+            cur_pos = target; 
+            child = (cur_pos<<1)+1; 
+        } 
+    } 
+};  // priority_queue_node 
+ 
+//! Forwards messages only if the threshold has not been reached 
+/** This node forwards items until its threshold is reached. 
+    It contains no buffering.  If the downstream node rejects, the 
+    message is dropped. */ 
+template< typename T, typename DecrementType=continue_msg > 
+class limiter_node : public graph_node, public receiver< T >, public sender< T > { 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+    //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later. 
+ 
+private: 
+    size_t my_threshold; 
+    size_t my_count; // number of successful puts 
+    size_t my_tries; // number of active put attempts 
+    reservable_predecessor_cache< T, spin_mutex > my_predecessors; 
+    spin_mutex my_mutex; 
+    broadcast_cache< T > my_successors; 
+ 
+    //! The internal receiver< DecrementType > that adjusts the count 
+    threshold_regulator< limiter_node<T, DecrementType>, DecrementType > decrement; 
+ 
+    graph_task* decrement_counter( long long delta ) { 
+        { 
+            spin_mutex::scoped_lock lock(my_mutex); 
+            if( delta > 0 && size_t(delta) > my_count ) 
+                my_count = 0; 
+            else if( delta < 0 && size_t(delta) > my_threshold - my_count ) 
+                my_count = my_threshold; 
+            else 
+                my_count -= size_t(delta); // absolute value of delta is sufficiently small 
+        } 
+        return forward_task(); 
+    } 
+ 
+    // Let threshold_regulator call decrement_counter() 
+    friend class threshold_regulator< limiter_node<T, DecrementType>, DecrementType >; 
+ 
+    friend class forward_task_bypass< limiter_node<T,DecrementType> >; 
+ 
+    bool check_conditions() {  // always called under lock 
+        return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() ); 
+    } 
+ 
+    // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED 
+    graph_task* forward_task() { 
+        input_type v; 
+        graph_task* rval = NULL; 
+        bool reserved = false; 
+            { 
+                spin_mutex::scoped_lock lock(my_mutex); 
+                if ( check_conditions() ) 
+                    ++my_tries; 
+                else 
+                    return NULL; 
+            } 
+ 
+        //SUCCESS 
+        // if we can reserve and can put, we consume the reservation 
+        // we increment the count and decrement the tries 
+        if ( (my_predecessors.try_reserve(v)) == true ){ 
+            reserved=true; 
+            if ( (rval = my_successors.try_put_task(v)) != NULL ){ 
+                { 
+                    spin_mutex::scoped_lock lock(my_mutex); 
+                    ++my_count; 
+                    --my_tries; 
+                    my_predecessors.try_consume(); 
+                    if ( check_conditions() ) { 
+                        if ( is_graph_active(this->my_graph) ) { 
+                            typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; 
+                            small_object_allocator allocator{}; 
+                            graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this ); 
+                            my_graph.reserve_wait(); 
+                            spawn_in_graph_arena(graph_reference(), *rtask); 
+                        } 
+                    } 
+                } 
+                return rval; 
+            } 
+        } 
+        //FAILURE 
+        //if we can't reserve, we decrement the tries 
+        //if we can reserve but can't put, we decrement the tries and release the reservation 
+        { 
+            spin_mutex::scoped_lock lock(my_mutex); 
+            --my_tries; 
+            if (reserved) my_predecessors.try_release(); 
+            if ( check_conditions() ) { 
+                if ( is_graph_active(this->my_graph) ) { 
+                    small_object_allocator allocator{}; 
+                    typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; 
+                    graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); 
+                    my_graph.reserve_wait(); 
+                    __TBB_ASSERT(!rval, "Have two tasks to handle"); 
+                    return t; 
+                } 
+            } 
+            return rval; 
+        } 
+    } 
+ 
+    void initialize() { 
+        fgt_node( 
+            CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, 
+            static_cast<receiver<input_type> *>(this), static_cast<receiver<DecrementType> *>(&decrement), 
+            static_cast<sender<output_type> *>(this) 
+        ); 
+    } 
+ 
+public: 
+    //! Constructor 
+    limiter_node(graph &g, size_t threshold) 
+        : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this) 
+        , my_successors(this), decrement(this) 
+    { 
+        initialize(); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    limiter_node(const node_set<Args...>& nodes, size_t threshold) 
+        : limiter_node(nodes.graph_reference(), threshold) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor 
+    limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} 
+ 
+    //! The interface for accessing internal receiver< DecrementType > that adjusts the count 
+    receiver<DecrementType>& decrementer() { return decrement; } 
+ 
+    //! Replace the current successor with this new successor 
+    bool register_successor( successor_type &r ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        bool was_empty = my_successors.empty(); 
+        my_successors.register_successor(r); 
+        //spawn a forward task if this is the only successor 
+        if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { 
+            if ( is_graph_active(this->my_graph) ) { 
+                small_object_allocator allocator{}; 
+                typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; 
+                graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); 
+                my_graph.reserve_wait(); 
+                spawn_in_graph_arena(graph_reference(), *t); 
+            } 
+        } 
+        return true; 
+    } 
+ 
+    //! Removes a successor from this node 
+    /** r.remove_predecessor(*this) is also called. */ 
+    bool remove_successor( successor_type &r ) override { 
+        // TODO revamp: investigate why qualification is needed for remove_predecessor() call 
+        tbb::detail::d1::remove_predecessor(r, *this); 
+        my_successors.remove_successor(r); 
+        return true; 
+    } 
+ 
+    //! Adds src to the list of cached predecessors. 
+    bool register_predecessor( predecessor_type &src ) override { 
+        spin_mutex::scoped_lock lock(my_mutex); 
+        my_predecessors.add( src ); 
+        if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { 
+            small_object_allocator allocator{}; 
+            typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; 
+            graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); 
+            my_graph.reserve_wait(); 
+            spawn_in_graph_arena(graph_reference(), *t); 
+        } 
+        return true; 
+    } 
+ 
+    //! Removes src from the list of cached predecessors. 
+    bool remove_predecessor( predecessor_type &src ) override { 
+        my_predecessors.remove( src ); 
+        return true; 
+    } 
+ 
+protected: 
+ 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    //! Puts an item to this receiver 
+    graph_task* try_put_task( const T &t ) override { 
+        { 
+            spin_mutex::scoped_lock lock(my_mutex); 
+            if ( my_count + my_tries >= my_threshold ) 
+                return NULL; 
+            else 
+                ++my_tries; 
+        } 
+ 
+        graph_task* rtask = my_successors.try_put_task(t); 
+ 
+        if ( !rtask ) {  // try_put_task failed. 
+            spin_mutex::scoped_lock lock(my_mutex); 
+            --my_tries; 
+            if (check_conditions() && is_graph_active(this->my_graph)) { 
+                small_object_allocator allocator{}; 
+                typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; 
+                rtask = allocator.new_object<task_type>(my_graph, allocator, *this); 
+                my_graph.reserve_wait(); 
+            } 
+        } 
+        else { 
+            spin_mutex::scoped_lock lock(my_mutex); 
+            ++my_count; 
+            --my_tries; 
+             } 
+        return rtask; 
+    } 
+ 
+    graph& graph_reference() const override { return my_graph; } 
+ 
+    void reset_node( reset_flags f) override { 
+        my_count = 0; 
+        if(f & rf_clear_edges) { 
+            my_predecessors.clear(); 
+            my_successors.clear(); 
+        } 
+        else 
+        { 
+            my_predecessors.reset( ); 
+        } 
+        decrement.reset_receiver(f); 
+    } 
+};  // limiter_node 
+ 
+#include "detail/_flow_graph_join_impl.h" 
+ 
+template<typename OutputTuple, typename JP=queueing> class join_node; 
+ 
+template<typename OutputTuple> 
+class join_node<OutputTuple,reserving>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> { 
+private: 
+    static const int N = std::tuple_size<OutputTuple>::value; 
+    typedef unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type; 
+public: 
+    typedef OutputTuple output_type; 
+    typedef typename unfolded_type::input_ports_type input_ports_type; 
+     __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, 
+                                            this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, 
+                                            this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+template<typename OutputTuple> 
+class join_node<OutputTuple,queueing>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> { 
+private: 
+    static const int N = std::tuple_size<OutputTuple>::value; 
+    typedef unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type; 
+public: 
+    typedef OutputTuple output_type; 
+    typedef typename unfolded_type::input_ports_type input_ports_type; 
+     __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, 
+                                            this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, 
+                                            this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+// template for key_matching join_node 
+// tag_matching join_node is a specialization of key_matching, and is source-compatible. 
+template<typename OutputTuple, typename K, typename KHash> 
+class join_node<OutputTuple, key_matching<K, KHash> > : public unfolded_join_node<std::tuple_size<OutputTuple>::value, 
+      key_matching_port, OutputTuple, key_matching<K,KHash> > { 
+private: 
+    static const int N = std::tuple_size<OutputTuple>::value; 
+    typedef unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type; 
+public: 
+    typedef OutputTuple output_type; 
+    typedef typename unfolded_type::input_ports_type input_ports_type; 
+ 
+#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING 
+    join_node(graph &g) : unfolded_type(g) {} 
+#endif  /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ 
+ 
+    template<typename __TBB_B0, typename __TBB_B1> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) : 
+            unfolded_type(g, b0, b1, b2, b3, b4) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#if __TBB_VARIADIC_MAX >= 6 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, 
+        typename __TBB_B5> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) : 
+            unfolded_type(g, b0, b1, b2, b3, b4, b5) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#endif 
+#if __TBB_VARIADIC_MAX >= 7 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, 
+        typename __TBB_B5, typename __TBB_B6> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) : 
+            unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#endif 
+#if __TBB_VARIADIC_MAX >= 8 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, 
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, 
+            __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#endif 
+#if __TBB_VARIADIC_MAX >= 9 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, 
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, 
+            __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#endif 
+#if __TBB_VARIADIC_MAX >= 10 
+    template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, 
+        typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9> 
+     __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, 
+            __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+#endif 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template < 
+#if (__clang_major__ == 3 && __clang_minor__ == 4) 
+        // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter. 
+        template<typename...> class node_set, 
+#endif 
+        typename... Args, typename... Bodies 
+    > 
+    __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, Bodies... bodies) 
+        : join_node(nodes.graph_reference(), bodies...) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, 
+                                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+// indexer node 
+#include "detail/_flow_graph_indexer_impl.h" 
+ 
+// TODO: Implement interface with variadic template or tuple 
+template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type, 
+                      typename T4=null_type, typename T5=null_type, typename T6=null_type, 
+                      typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node; 
+ 
+//indexer node specializations 
+template<typename T0> 
+class indexer_node<T0> : public unfolded_indexer_node<std::tuple<T0> > { 
+private: 
+    static const int N = 1; 
+public: 
+    typedef std::tuple<T0> InputTuple; 
+    typedef tagged_msg<size_t, T0> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+}; 
+ 
+template<typename T0, typename T1> 
+class indexer_node<T0, T1> : public unfolded_indexer_node<std::tuple<T0, T1> > { 
+private: 
+    static const int N = 2; 
+public: 
+    typedef std::tuple<T0, T1> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+template<typename T0, typename T1, typename T2> 
+class indexer_node<T0, T1, T2> : public unfolded_indexer_node<std::tuple<T0, T1, T2> > { 
+private: 
+    static const int N = 3; 
+public: 
+    typedef std::tuple<T0, T1, T2> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+template<typename T0, typename T1, typename T2, typename T3> 
+class indexer_node<T0, T1, T2, T3> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3> > { 
+private: 
+    static const int N = 4; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+template<typename T0, typename T1, typename T2, typename T3, typename T4> 
+class indexer_node<T0, T1, T2, T3, T4> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4> > { 
+private: 
+    static const int N = 5; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+ 
+#if __TBB_VARIADIC_MAX >= 6 
+template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> 
+class indexer_node<T0, T1, T2, T3, T4, T5> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5> > { 
+private: 
+    static const int N = 6; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4, T5> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+#endif //variadic max 6 
+ 
+#if __TBB_VARIADIC_MAX >= 7 
+template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, 
+         typename T6> 
+class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6> > { 
+private: 
+    static const int N = 7; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+#endif //variadic max 7 
+ 
+#if __TBB_VARIADIC_MAX >= 8 
+template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, 
+         typename T6, typename T7> 
+class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> > { 
+private: 
+    static const int N = 8; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+#endif //variadic max 8 
+ 
+#if __TBB_VARIADIC_MAX >= 9 
+template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, 
+         typename T6, typename T7, typename T8> 
+class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > { 
+private: 
+    static const int N = 9; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+#endif //variadic max 9 
+ 
+#if __TBB_VARIADIC_MAX >= 10 
+template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, 
+         typename T6, typename T7, typename T8, typename T9> 
+class indexer_node/*default*/ : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > { 
+private: 
+    static const int N = 10; 
+public: 
+    typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple; 
+    typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type; 
+    typedef unfolded_indexer_node<InputTuple> unfolded_type; 
+    __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    // Copy constructor 
+    __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { 
+        fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, 
+                                           this->input_ports(), static_cast< sender< output_type > *>(this) ); 
+    } 
+ 
+}; 
+#endif //variadic max 10 
+ 
+template< typename T > 
+inline void internal_make_edge( sender<T> &p, receiver<T> &s ) { 
+    register_successor(p, s); 
+    fgt_make_edge( &p, &s ); 
+} 
+ 
+//! Makes an edge between a single predecessor and a single successor 
+template< typename T > 
+inline void make_edge( sender<T> &p, receiver<T> &s ) { 
+    internal_make_edge( p, s ); 
+} 
+ 
+//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor. 
+template< typename T, typename V, 
+          typename = typename T::output_ports_type, typename = typename V::input_ports_type > 
+inline void make_edge( T& output, V& input) { 
+    make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); 
+} 
+ 
+//Makes an edge from port 0 of a multi-output predecessor to a receiver. 
+template< typename T, typename R, 
+          typename = typename T::output_ports_type > 
+inline void make_edge( T& output, receiver<R>& input) { 
+     make_edge(std::get<0>(output.output_ports()), input); 
+} 
+ 
+//Makes an edge from a sender to port 0 of a multi-input successor. 
+template< typename S,  typename V, 
+          typename = typename V::input_ports_type > 
+inline void make_edge( sender<S>& output, V& input) { 
+     make_edge(output, std::get<0>(input.input_ports())); 
+} 
+ 
+template< typename T > 
+inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) { 
+    remove_successor( p, s ); 
+    fgt_remove_edge( &p, &s ); 
+} 
+ 
+//! Removes an edge between a single predecessor and a single successor 
+template< typename T > 
+inline void remove_edge( sender<T> &p, receiver<T> &s ) { 
+    internal_remove_edge( p, s ); 
+} 
+ 
+//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor. 
+template< typename T, typename V, 
+          typename = typename T::output_ports_type, typename = typename V::input_ports_type > 
+inline void remove_edge( T& output, V& input) { 
+    remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); 
+} 
+ 
+//Removes an edge between port 0 of a multi-output predecessor and a receiver. 
+template< typename T, typename R, 
+          typename = typename T::output_ports_type > 
+inline void remove_edge( T& output, receiver<R>& input) { 
+     remove_edge(std::get<0>(output.output_ports()), input); 
+} 
+//Removes an edge between a sender and port 0 of a multi-input successor. 
+template< typename S,  typename V, 
+          typename = typename V::input_ports_type > 
+inline void remove_edge( sender<S>& output, V& input) { 
+     remove_edge(output, std::get<0>(input.input_ports())); 
+} 
+ 
+//! Returns a copy of the body from a function or continue node 
+template< typename Body, typename Node > 
+Body copy_body( Node &n ) { 
+    return n.template copy_function_object<Body>(); 
+} 
+ 
+//composite_node 
+template< typename InputTuple, typename OutputTuple > class composite_node; 
+ 
+template< typename... InputTypes, typename... OutputTypes> 
+class composite_node <std::tuple<InputTypes...>, std::tuple<OutputTypes...> > : public graph_node { 
+ 
+public: 
+    typedef std::tuple< receiver<InputTypes>&... > input_ports_type; 
+    typedef std::tuple< sender<OutputTypes>&... > output_ports_type; 
+ 
+private: 
+    std::unique_ptr<input_ports_type> my_input_ports; 
+    std::unique_ptr<output_ports_type> my_output_ports; 
+ 
+    static const size_t NUM_INPUTS = sizeof...(InputTypes); 
+    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); 
+ 
+protected: 
+    void reset_node(reset_flags) override {} 
+ 
+public: 
+    composite_node( graph &g ) : graph_node(g) { 
+        fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph ); 
+    } 
+ 
+    template<typename T1, typename T2> 
+    void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) { 
+        static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); 
+        static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); 
+ 
+        fgt_internal_input_alias_helper<T1, NUM_INPUTS>::alias_port( this, input_ports_tuple); 
+        fgt_internal_output_alias_helper<T2, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); 
+ 
+        my_input_ports.reset( new input_ports_type(std::forward<T1>(input_ports_tuple)) ); 
+        my_output_ports.reset( new output_ports_type(std::forward<T2>(output_ports_tuple)) ); 
+    } 
+ 
+    template< typename... NodeTypes > 
+    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } 
+ 
+    template< typename... NodeTypes > 
+    void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } 
+ 
+ 
+    input_ports_type& input_ports() { 
+         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); 
+         return *my_input_ports; 
+    } 
+ 
+    output_ports_type& output_ports() { 
+         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); 
+         return *my_output_ports; 
+    } 
+};  // class composite_node 
+ 
+//composite_node with only input ports 
+template< typename... InputTypes> 
+class composite_node <std::tuple<InputTypes...>, std::tuple<> > : public graph_node { 
+public: 
+    typedef std::tuple< receiver<InputTypes>&... > input_ports_type; 
+ 
+private: 
+    std::unique_ptr<input_ports_type> my_input_ports; 
+    static const size_t NUM_INPUTS = sizeof...(InputTypes); 
+ 
+protected: 
+    void reset_node(reset_flags) override {} 
+ 
+public: 
+    composite_node( graph &g ) : graph_node(g) { 
+        fgt_composite( CODEPTR(), this, &g ); 
+    } 
+ 
+   template<typename T> 
+   void set_external_ports(T&& input_ports_tuple) { 
+       static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); 
+ 
+       fgt_internal_input_alias_helper<T, NUM_INPUTS>::alias_port( this, input_ports_tuple); 
+ 
+       my_input_ports.reset( new input_ports_type(std::forward<T>(input_ports_tuple)) ); 
+   } 
+ 
+    template< typename... NodeTypes > 
+    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } 
+ 
+    template< typename... NodeTypes > 
+    void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); } 
+ 
+ 
+    input_ports_type& input_ports() { 
+         __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); 
+         return *my_input_ports; 
+    } 
+ 
+};  // class composite_node 
+ 
+//composite_nodes with only output_ports 
+template<typename... OutputTypes> 
+class composite_node <std::tuple<>, std::tuple<OutputTypes...> > : public graph_node { 
+public: 
+    typedef std::tuple< sender<OutputTypes>&... > output_ports_type; 
+ 
+private: 
+    std::unique_ptr<output_ports_type> my_output_ports; 
+    static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); 
+ 
+protected: 
+    void reset_node(reset_flags) override {} 
+ 
+public: 
+    __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) { 
+        fgt_composite( CODEPTR(), this, &g ); 
+    } 
+ 
+   template<typename T> 
+   void set_external_ports(T&& output_ports_tuple) { 
+       static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); 
+ 
+       fgt_internal_output_alias_helper<T, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); 
+ 
+       my_output_ports.reset( new output_ports_type(std::forward<T>(output_ports_tuple)) ); 
+   } 
+ 
+    template<typename... NodeTypes > 
+    void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } 
+ 
+    template<typename... NodeTypes > 
+    void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } 
+ 
+ 
+    output_ports_type& output_ports() { 
+         __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); 
+         return *my_output_ports; 
+    } 
+ 
+};  // class composite_node 
+ 
+template<typename Gateway> 
+class async_body_base: no_assign { 
+public: 
+    typedef Gateway gateway_type; 
+ 
+    async_body_base(gateway_type *gateway): my_gateway(gateway) { } 
+    void set_gateway(gateway_type *gateway) { 
+        my_gateway = gateway; 
+    } 
+ 
+protected: 
+    gateway_type *my_gateway; 
+}; 
+ 
+template<typename Input, typename Ports, typename Gateway, typename Body> 
+class async_body: public async_body_base<Gateway> { 
+public: 
+    typedef async_body_base<Gateway> base_type; 
+    typedef Gateway gateway_type; 
+ 
+    async_body(const Body &body, gateway_type *gateway) 
+        : base_type(gateway), my_body(body) { } 
+ 
+    void operator()( const Input &v, Ports & ) { 
+        my_body(v, *this->my_gateway); 
+    } 
+ 
+    Body get_body() { return my_body; } 
+ 
+private: 
+    Body my_body; 
+}; 
+ 
+//! Implements async node 
+template < typename Input, typename Output, 
+           typename Policy = queueing_lightweight > 
+class async_node 
+    : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output > 
+{ 
+    typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type; 
+    typedef multifunction_input< 
+        Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator<Input>> mfn_input_type; 
+ 
+public: 
+    typedef Input input_type; 
+    typedef Output output_type; 
+    typedef receiver<input_type> receiver_type; 
+    typedef receiver<output_type> successor_type; 
+    typedef sender<input_type> predecessor_type; 
+    typedef receiver_gateway<output_type> gateway_type; 
+    typedef async_body_base<gateway_type> async_body_base_type; 
+    typedef typename base_type::output_ports_type output_ports_type; 
+ 
+private: 
+    class receiver_gateway_impl: public receiver_gateway<Output> { 
+    public: 
+        receiver_gateway_impl(async_node* node): my_node(node) {} 
+        void reserve_wait() override { 
+            fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph); 
+            my_node->my_graph.reserve_wait(); 
+        } 
+ 
+        void release_wait() override { 
+            async_node* n = my_node; 
+            graph* g = &n->my_graph; 
+            g->release_wait(); 
+            fgt_async_commit(static_cast<typename async_node::receiver_type *>(n), g); 
+        } 
+ 
+        //! Implements gateway_type::try_put for an external activity to submit a message to FG 
+        bool try_put(const Output &i) override { 
+            return my_node->try_put_impl(i); 
+        } 
+ 
+    private: 
+        async_node* my_node; 
+    } my_gateway; 
+ 
+    //The substitute of 'this' for member construction, to prevent compiler warnings 
+    async_node* self() { return this; } 
+ 
+    //! Implements gateway_type::try_put for an external activity to submit a message to FG 
+    bool try_put_impl(const Output &i) { 
+        multifunction_output<Output> &port_0 = output_port<0>(*this); 
+        broadcast_cache<output_type>& port_successors = port_0.successors(); 
+        fgt_async_try_put_begin(this, &port_0); 
+        // TODO revamp: change to std::list<graph_task*> 
+        graph_task_list tasks; 
+        bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks); 
+        __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(), 
+                      "Return status is inconsistent with the method operation." ); 
+ 
+        while( !tasks.empty() ) { 
+            enqueue_in_graph_arena(this->my_graph, tasks.pop_front()); 
+        } 
+        fgt_async_try_put_end(this, &port_0); 
+        return is_at_least_one_put_successful; 
+    } 
+ 
+public: 
+    template<typename Body> 
+    __TBB_NOINLINE_SYM async_node( 
+        graph &g, size_t concurrency, 
+        Body body, Policy = Policy(), node_priority_t a_priority = no_priority 
+    ) : base_type( 
+        g, concurrency, 
+        async_body<Input, typename base_type::output_ports_type, gateway_type, Body> 
+        (body, &my_gateway), a_priority ), my_gateway(self()) { 
+        fgt_multioutput_node_with_body<1>( 
+            CODEPTR(), FLOW_ASYNC_NODE, 
+            &this->my_graph, static_cast<receiver<input_type> *>(this), 
+            this->output_ports(), this->my_body 
+        ); 
+    } 
+ 
+    template <typename Body, typename... Args> 
+    __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) 
+        : async_node(g, concurrency, body, Policy(), a_priority) {} 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename Body, typename... Args> 
+    __TBB_NOINLINE_SYM async_node( 
+        const node_set<Args...>& nodes, size_t concurrency, Body body, 
+        Policy = Policy(), node_priority_t a_priority = no_priority ) 
+        : async_node(nodes.graph_reference(), concurrency, body, a_priority) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+ 
+    template <typename Body, typename... Args> 
+    __TBB_NOINLINE_SYM async_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) 
+        : async_node(nodes, concurrency, body, Policy(), a_priority) {} 
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+ 
+    __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) { 
+        static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway); 
+        static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway); 
+ 
+        fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, 
+                &this->my_graph, static_cast<receiver<input_type> *>(this), 
+                this->output_ports(), this->my_body ); 
+    } 
+ 
+    gateway_type& gateway() { 
+        return my_gateway; 
+    } 
+ 
+    // Define sender< Output > 
+ 
+    //! Add a new successor to this node 
+    bool register_successor(successor_type&) override { 
+        __TBB_ASSERT(false, "Successors must be registered only via ports"); 
+        return false; 
+    } 
+ 
+    //! Removes a successor from this node 
+    bool remove_successor(successor_type&) override { 
+        __TBB_ASSERT(false, "Successors must be removed only via ports"); 
+        return false; 
+    } 
+ 
+    template<typename Body> 
+    Body copy_function_object() { 
+        typedef multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type; 
+        typedef async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type; 
+        mfn_body_type &body_ref = *this->my_body; 
+        async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr()); 
+        return ab.get_body(); 
+    } 
+ 
+protected: 
+ 
+    void reset_node( reset_flags f) override { 
+       base_type::reset_node(f); 
+    } 
+}; 
+ 
+#include "detail/_flow_graph_node_set_impl.h" 
+ 
+template< typename T > 
+class overwrite_node : public graph_node, public receiver<T>, public sender<T> { 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) 
+        : graph_node(g), my_successors(this), my_buffer_is_valid(false) 
+    { 
+        fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, 
+                  static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    overwrite_node(const node_set<Args...>& nodes) : overwrite_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor; doesn't take anything from src; default won't work 
+    __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} 
+ 
+    ~overwrite_node() {} 
+ 
+    bool register_successor( successor_type &s ) override { 
+        spin_mutex::scoped_lock l( my_mutex ); 
+        if (my_buffer_is_valid && is_graph_active( my_graph )) { 
+            // We have a valid value that must be forwarded immediately. 
+            bool ret = s.try_put( my_buffer ); 
+            if ( ret ) { 
+                // We add the successor that accepted our put 
+                my_successors.register_successor( s ); 
+            } else { 
+                // In case of reservation a race between the moment of reservation and register_successor can appear, 
+                // because failed reserve does not mean that register_successor is not ready to put a message immediately. 
+                // We have some sort of infinite loop: reserving node tries to set pull state for the edge, 
+                // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. 
+                small_object_allocator allocator{}; 
+                typedef register_predecessor_task task_type; 
+                graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s); 
+                graph_reference().reserve_wait(); 
+                spawn_in_graph_arena( my_graph, *t ); 
+            } 
+        } else { 
+            // No valid value yet, just add as successor 
+            my_successors.register_successor( s ); 
+        } 
+        return true; 
+    } 
+ 
+    bool remove_successor( successor_type &s ) override { 
+        spin_mutex::scoped_lock l( my_mutex ); 
+        my_successors.remove_successor(s); 
+        return true; 
+    } 
+ 
+    bool try_get( input_type &v ) override { 
+        spin_mutex::scoped_lock l( my_mutex ); 
+        if ( my_buffer_is_valid ) { 
+            v = my_buffer; 
+            return true; 
+        } 
+        return false; 
+    } 
+ 
+    //! Reserves an item 
+    bool try_reserve( T &v ) override { 
+        return try_get(v); 
+    } 
+ 
+    //! Releases the reserved item 
+    bool try_release() override { return true; } 
+ 
+    //! Consumes the reserved item 
+    bool try_consume() override { return true; } 
+ 
+    bool is_valid() { 
+       spin_mutex::scoped_lock l( my_mutex ); 
+       return my_buffer_is_valid; 
+    } 
+ 
+    void clear() { 
+       spin_mutex::scoped_lock l( my_mutex ); 
+       my_buffer_is_valid = false; 
+    } 
+ 
+protected: 
+ 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    graph_task* try_put_task( const input_type &v ) override { 
+        spin_mutex::scoped_lock l( my_mutex ); 
+        return try_put_task_impl(v); 
+    } 
+ 
+    graph_task * try_put_task_impl(const input_type &v) { 
+        my_buffer = v; 
+        my_buffer_is_valid = true; 
+        graph_task* rtask = my_successors.try_put_task(v); 
+        if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; 
+        return rtask; 
+    } 
+ 
+    graph& graph_reference() const override { 
+        return my_graph; 
+    } 
+ 
+    //! Breaks an infinite loop between the node reservation and register_successor call 
+    struct register_predecessor_task : public graph_task { 
+        register_predecessor_task( 
+            graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) 
+            : graph_task(g, allocator), o(owner), s(succ) {}; 
+ 
+        task* execute(execution_data& ed) override { 
+            // TODO revamp: investigate why qualification is needed for register_successor() call 
+            using tbb::detail::d1::register_predecessor; 
+            using tbb::detail::d1::register_successor; 
+            if ( !register_predecessor(s, o) ) { 
+                register_successor(o, s); 
+            } 
+            finalize(ed); 
+            return nullptr; 
+        } 
+ 
+        predecessor_type& o; 
+        successor_type& s; 
+    }; 
+ 
+    spin_mutex my_mutex; 
+    broadcast_cache< input_type, null_rw_mutex > my_successors; 
+    input_type my_buffer; 
+    bool my_buffer_is_valid; 
+ 
+    void reset_node( reset_flags f) override { 
+        my_buffer_is_valid = false; 
+       if (f&rf_clear_edges) { 
+           my_successors.clear(); 
+       } 
+    } 
+};  // overwrite_node 
+ 
+template< typename T > 
+class write_once_node : public overwrite_node<T> { 
+public: 
+    typedef T input_type; 
+    typedef T output_type; 
+    typedef overwrite_node<T> base_type; 
+    typedef typename receiver<input_type>::predecessor_type predecessor_type; 
+    typedef typename sender<output_type>::successor_type successor_type; 
+ 
+    //! Constructor 
+    __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) { 
+        fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    template <typename... Args> 
+    write_once_node(const node_set<Args...>& nodes) : write_once_node(nodes.graph_reference()) { 
+        make_edges_in_order(nodes, *this); 
+    } 
+#endif 
+ 
+    //! Copy constructor: call base class copy constructor 
+    __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) { 
+        fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), 
+                                 static_cast<receiver<input_type> *>(this), 
+                                 static_cast<sender<output_type> *>(this) ); 
+    } 
+ 
+protected: 
+    template< typename R, typename B > friend class run_and_put_task; 
+    template<typename X, typename Y> friend class broadcast_cache; 
+    template<typename X, typename Y> friend class round_robin_cache; 
+    graph_task *try_put_task( const T &v ) override { 
+        spin_mutex::scoped_lock l( this->my_mutex ); 
+        return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v); 
+    } 
+}; // write_once_node 
+ 
+inline void set_name(const graph& g, const char *name) { 
+    fgt_graph_desc(&g, name); 
+} 
+ 
+template <typename Output> 
+inline void set_name(const input_node<Output>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename Input, typename Output, typename Policy> 
+inline void set_name(const function_node<Input, Output, Policy>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename Output, typename Policy> 
+inline void set_name(const continue_node<Output,Policy>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const broadcast_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const buffer_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const queue_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const sequencer_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T, typename Compare> 
+inline void set_name(const priority_queue_node<T, Compare>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T, typename DecrementType> 
+inline void set_name(const limiter_node<T, DecrementType>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename OutputTuple, typename JP> 
+inline void set_name(const join_node<OutputTuple, JP>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename... Types> 
+inline void set_name(const indexer_node<Types...>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const overwrite_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template <typename T> 
+inline void set_name(const write_once_node<T>& node, const char *name) { 
+    fgt_node_desc(&node, name); 
+} 
+ 
+template<typename Input, typename Output, typename Policy> 
+inline void set_name(const multifunction_node<Input, Output, Policy>& node, const char *name) { 
+    fgt_multioutput_node_desc(&node, name); 
+} 
+ 
+template<typename TupleType> 
+inline void set_name(const split_node<TupleType>& node, const char *name) { 
+    fgt_multioutput_node_desc(&node, name); 
+} 
+ 
+template< typename InputTuple, typename OutputTuple > 
+inline void set_name(const composite_node<InputTuple, OutputTuple>& node, const char *name) { 
+    fgt_multiinput_multioutput_node_desc(&node, name); 
+} 
+ 
+template<typename Input, typename Output, typename Policy> 
+inline void set_name(const async_node<Input, Output, Policy>& node, const char *name) 
+{ 
+    fgt_multioutput_node_desc(&node, name); 
+} 
+} // d1 
+} // detail 
+} // tbb 
+ 
+ 
+// Include deduction guides for node classes 
+#include "detail/_flow_graph_nodes_deduction.h" 
+ 
+namespace tbb { 
+namespace flow { 
+inline namespace v1 { 
+    using detail::d1::receiver; 
+    using detail::d1::sender; 
+ 
+    using detail::d1::serial; 
+    using detail::d1::unlimited; 
+ 
+    using detail::d1::reset_flags; 
+    using detail::d1::rf_reset_protocol; 
+    using detail::d1::rf_reset_bodies; 
+    using detail::d1::rf_clear_edges; 
+ 
+    using detail::d1::graph; 
+    using detail::d1::graph_node; 
+    using detail::d1::continue_msg; 
+ 
+    using detail::d1::input_node; 
+    using detail::d1::function_node; 
+    using detail::d1::multifunction_node; 
+    using detail::d1::split_node; 
+    using detail::d1::output_port; 
+    using detail::d1::indexer_node; 
+    using detail::d1::tagged_msg; 
+    using detail::d1::cast_to; 
+    using detail::d1::is_a; 
+    using detail::d1::continue_node; 
+    using detail::d1::overwrite_node; 
+    using detail::d1::write_once_node; 
+    using detail::d1::broadcast_node; 
+    using detail::d1::buffer_node; 
+    using detail::d1::queue_node; 
+    using detail::d1::sequencer_node; 
+    using detail::d1::priority_queue_node; 
+    using detail::d1::limiter_node; 
+    using namespace detail::d1::graph_policy_namespace; 
+    using detail::d1::join_node; 
+    using detail::d1::input_port; 
+    using detail::d1::copy_body; 
+    using detail::d1::make_edge; 
+    using detail::d1::remove_edge; 
+    using detail::d1::tag_value; 
+    using detail::d1::composite_node; 
+    using detail::d1::async_node; 
+    using detail::d1::node_priority_t; 
+    using detail::d1::no_priority; 
+ 
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET 
+    using detail::d1::follows; 
+    using detail::d1::precedes; 
+    using detail::d1::make_node_set; 
+    using detail::d1::make_edges; 
+#endif 
+ 
+} // v1 
+} // flow 
+ 
+    using detail::d1::flow_control; 
+ 
+namespace profiling { 
+    using detail::d1::set_name; 
+} // profiling 
+ 
+} // tbb 
+ 
+ 
+#if TBB_USE_PROFILING_TOOLS  && ( __linux__ || __APPLE__ ) 
+   // We don't do pragma pop here, since it still gives warning on the USER side 
+   #undef __TBB_NOINLINE_SYM 
+#endif 
+ 
+#endif // __TBB_flow_graph_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h
index 121f167c4d..6ab5f7dbaf 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h
@@ -1,51 +1,51 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_flow_graph_abstractions_H
-#define __TBB_flow_graph_abstractions_H
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Pure virtual template classes that define interfaces for async communication
-class graph_proxy {
-public:
-    //! Inform a graph that messages may come from outside, to prevent premature graph completion
-    virtual void reserve_wait() = 0;
-
-    //! Inform a graph that a previous call to reserve_wait is no longer in effect
-    virtual void release_wait() = 0;
-
-    virtual ~graph_proxy() {}
-};
-
-template <typename Input>
-class receiver_gateway : public graph_proxy {
-public:
-    //! Type of inputing data into FG.
-    typedef Input input_type;
-
-    //! Submit signal from an asynchronous activity to FG.
-    virtual bool try_put(const input_type&) = 0;
-};
-
-} // d1
-
-
-} // detail
-} // tbb
-#endif
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_flow_graph_abstractions_H 
+#define __TBB_flow_graph_abstractions_H 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Pure virtual template classes that define interfaces for async communication 
+class graph_proxy { 
+public: 
+    //! Inform a graph that messages may come from outside, to prevent premature graph completion 
+    virtual void reserve_wait() = 0; 
+ 
+    //! Inform a graph that a previous call to reserve_wait is no longer in effect 
+    virtual void release_wait() = 0; 
+ 
+    virtual ~graph_proxy() {} 
+}; 
+ 
+template <typename Input> 
+class receiver_gateway : public graph_proxy { 
+public: 
+    //! Type of inputing data into FG. 
+    typedef Input input_type; 
+ 
+    //! Submit signal from an asynchronous activity to FG. 
+    virtual bool try_put(const input_type&) = 0; 
+}; 
+ 
+} // d1 
+ 
+ 
+} // detail 
+} // tbb 
+#endif 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/global_control.h b/contrib/libs/tbb/include/oneapi/tbb/global_control.h
index 80177b6b82..ee31fe23c0 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/global_control.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/global_control.h
@@ -1,188 +1,188 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_global_control_H
-#define __TBB_global_control_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_assert.h"
-#include "detail/_template_helpers.h"
-#include "detail/_exception.h"
-
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-#include <new> // std::nothrow_t
-#endif
-#include <cstddef>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class global_control;
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-class task_scheduler_handle;
-#endif
-}
-
-namespace r1 {
-void __TBB_EXPORTED_FUNC create(d1::global_control&);
-void __TBB_EXPORTED_FUNC destroy(d1::global_control&);
-std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int);
-struct global_control_impl;
-struct control_storage_comparator;
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-void release_impl(d1::task_scheduler_handle& handle);
-bool finalize_impl(d1::task_scheduler_handle& handle);
-void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&);
-bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode);
-#endif
-}
-
-namespace d1 {
-
-class global_control {
-public:
-    enum parameter {
-        max_allowed_parallelism,
-        thread_stack_size,
-        terminate_on_exception,
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-        scheduler_handle, // not a public parameter
-#else
-        reserved1, // not a public parameter
-#endif
-        parameter_max // insert new parameters above this point
-    };
-
-    global_control(parameter p, std::size_t value) :
-        my_value(value), my_reserved(), my_param(p) {
-        suppress_unused_warning(my_reserved);
-        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
-#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
-        // For Windows 8 Store* apps it's impossible to set stack size
-        if (p==thread_stack_size)
-            return;
-#elif __TBB_x86_64 && (_WIN32 || _WIN64)
-        if (p==thread_stack_size)
-            __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range");
-#endif
-        if (my_param==max_allowed_parallelism)
-            __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0.");
-        r1::create(*this);
-    }
-
-    ~global_control() {
-        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
-#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
-        // For Windows 8 Store* apps it's impossible to set stack size
-        if (my_param==thread_stack_size)
-            return;
-#endif
-        r1::destroy(*this);
-    }
-
-    static std::size_t active_value(parameter p) {
-        __TBB_ASSERT(p < parameter_max, "Invalid parameter");
-        return r1::global_control_active_value((int)p);
-    }
-
-private:
-    std::size_t my_value;
-    std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility
-    parameter my_param;
-
-    friend struct r1::global_control_impl;
-    friend struct r1::control_storage_comparator;
-};
-
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-//! Finalization options.
-//! Outside of the class to avoid extensive friendship.
-static constexpr std::intptr_t release_nothrowing = 0;
-static constexpr std::intptr_t finalize_nothrowing = 1;
-static constexpr std::intptr_t finalize_throwing = 2;
-
-//! User side wrapper for a task scheduler lifetime control object
-class task_scheduler_handle {
-public:
-    task_scheduler_handle() = default;
-    ~task_scheduler_handle() {
-        release(*this);
-    }
-
-    //! No copy
-    task_scheduler_handle(const task_scheduler_handle& other) = delete;
-    task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete;
-
-    //! Move only
-    task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} {
-        std::swap(m_ctl, other.m_ctl);
-    }
-    task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept {
-        std::swap(m_ctl, other.m_ctl);
-        return *this;
-    };
-
-    //! Get and active instance of task_scheduler_handle
-    static task_scheduler_handle get() {
-         task_scheduler_handle handle;
-         r1::get(handle);
-         return handle;
-    }
-
-    //! Release the reference and deactivate handle
-    static void release(task_scheduler_handle& handle) {
-        if (handle.m_ctl != nullptr) {
-            r1::finalize(handle, release_nothrowing);
-        }
-    }
-
-private:
-    friend void r1::release_impl(task_scheduler_handle& handle);
-    friend bool r1::finalize_impl(task_scheduler_handle& handle);
-    friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&);
-
-    global_control* m_ctl{nullptr};
-};
-
-#if TBB_USE_EXCEPTIONS
-//! Waits for worker threads termination. Throws exception on error.
-inline void finalize(task_scheduler_handle& handle) {
-    r1::finalize(handle, finalize_throwing);
-}
-#endif
-//! Waits for worker threads termination. Returns false on error.
-inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept {
-    return r1::finalize(handle, finalize_nothrowing);
-}
-#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::global_control;
-#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
-using detail::d1::finalize;
-using detail::d1::task_scheduler_handle;
-using detail::r1::unsafe_wait;
-#endif
-} // namespace v1
-
-} // namespace tbb
-
-#endif // __TBB_global_control_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_global_control_H 
+#define __TBB_global_control_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_assert.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_exception.h" 
+ 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+#include <new> // std::nothrow_t 
+#endif 
+#include <cstddef> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class global_control; 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+class task_scheduler_handle; 
+#endif 
+} 
+ 
+namespace r1 { 
+void __TBB_EXPORTED_FUNC create(d1::global_control&); 
+void __TBB_EXPORTED_FUNC destroy(d1::global_control&); 
+std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); 
+struct global_control_impl; 
+struct control_storage_comparator; 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+void release_impl(d1::task_scheduler_handle& handle); 
+bool finalize_impl(d1::task_scheduler_handle& handle); 
+void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); 
+bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); 
+#endif 
+} 
+ 
+namespace d1 { 
+ 
+class global_control { 
+public: 
+    enum parameter { 
+        max_allowed_parallelism, 
+        thread_stack_size, 
+        terminate_on_exception, 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+        scheduler_handle, // not a public parameter 
+#else 
+        reserved1, // not a public parameter 
+#endif 
+        parameter_max // insert new parameters above this point 
+    }; 
+ 
+    global_control(parameter p, std::size_t value) : 
+        my_value(value), my_reserved(), my_param(p) { 
+        suppress_unused_warning(my_reserved); 
+        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); 
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) 
+        // For Windows 8 Store* apps it's impossible to set stack size 
+        if (p==thread_stack_size) 
+            return; 
+#elif __TBB_x86_64 && (_WIN32 || _WIN64) 
+        if (p==thread_stack_size) 
+            __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range"); 
+#endif 
+        if (my_param==max_allowed_parallelism) 
+            __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0."); 
+        r1::create(*this); 
+    } 
+ 
+    ~global_control() { 
+        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); 
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) 
+        // For Windows 8 Store* apps it's impossible to set stack size 
+        if (my_param==thread_stack_size) 
+            return; 
+#endif 
+        r1::destroy(*this); 
+    } 
+ 
+    static std::size_t active_value(parameter p) { 
+        __TBB_ASSERT(p < parameter_max, "Invalid parameter"); 
+        return r1::global_control_active_value((int)p); 
+    } 
+ 
+private: 
+    std::size_t my_value; 
+    std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility 
+    parameter my_param; 
+ 
+    friend struct r1::global_control_impl; 
+    friend struct r1::control_storage_comparator; 
+}; 
+ 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+//! Finalization options. 
+//! Outside of the class to avoid extensive friendship. 
+static constexpr std::intptr_t release_nothrowing = 0; 
+static constexpr std::intptr_t finalize_nothrowing = 1; 
+static constexpr std::intptr_t finalize_throwing = 2; 
+ 
+//! User side wrapper for a task scheduler lifetime control object 
+class task_scheduler_handle { 
+public: 
+    task_scheduler_handle() = default; 
+    ~task_scheduler_handle() { 
+        release(*this); 
+    } 
+ 
+    //! No copy 
+    task_scheduler_handle(const task_scheduler_handle& other) = delete; 
+    task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; 
+ 
+    //! Move only 
+    task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} { 
+        std::swap(m_ctl, other.m_ctl); 
+    } 
+    task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { 
+        std::swap(m_ctl, other.m_ctl); 
+        return *this; 
+    }; 
+ 
+    //! Get and active instance of task_scheduler_handle 
+    static task_scheduler_handle get() { 
+         task_scheduler_handle handle; 
+         r1::get(handle); 
+         return handle; 
+    } 
+ 
+    //! Release the reference and deactivate handle 
+    static void release(task_scheduler_handle& handle) { 
+        if (handle.m_ctl != nullptr) { 
+            r1::finalize(handle, release_nothrowing); 
+        } 
+    } 
+ 
+private: 
+    friend void r1::release_impl(task_scheduler_handle& handle); 
+    friend bool r1::finalize_impl(task_scheduler_handle& handle); 
+    friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); 
+ 
+    global_control* m_ctl{nullptr}; 
+}; 
+ 
+#if TBB_USE_EXCEPTIONS 
+//! Waits for worker threads termination. Throws exception on error. 
+inline void finalize(task_scheduler_handle& handle) { 
+    r1::finalize(handle, finalize_throwing); 
+} 
+#endif 
+//! Waits for worker threads termination. Returns false on error. 
+inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { 
+    return r1::finalize(handle, finalize_nothrowing); 
+} 
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::global_control; 
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 
+using detail::d1::finalize; 
+using detail::d1::task_scheduler_handle; 
+using detail::r1::unsafe_wait; 
+#endif 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif // __TBB_global_control_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/info.h b/contrib/libs/tbb/include/oneapi/tbb/info.h
index 21475a4d00..f08a7a8a9e 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/info.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/info.h
@@ -1,137 +1,137 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_info_H
-#define __TBB_info_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-#if __TBB_ARENA_BINDING
-#include <vector>
-
-namespace tbb {
-namespace detail {
-
-namespace d1{
-
-using numa_node_id = int;
-using core_type_id = int;
-
-// TODO: consider version approach to resolve backward compatibility potential issues.
-struct constraints {
-#if !__TBB_CPP20_PRESENT
-    constraints(numa_node_id id = -1, int maximal_concurrency = -1)
-        : numa_id(id)
-        , max_concurrency(maximal_concurrency)
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-        , core_type(-1)
-        , max_threads_per_core(-1)
-#endif
-    {}
-#endif /*!__TBB_CPP20_PRESENT*/
-
-    constraints& set_numa_id(numa_node_id id) {
-        numa_id = id;
-        return *this;
-    }
-    constraints& set_max_concurrency(int maximal_concurrency) {
-        max_concurrency = maximal_concurrency;
-        return *this;
-    }
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-    constraints& set_core_type(core_type_id id) {
-        core_type = id;
-        return *this;
-    }
-    constraints& set_max_threads_per_core(int threads_number) {
-        max_threads_per_core = threads_number;
-        return *this;
-    }
-#endif
-
-    numa_node_id numa_id = -1;
-    int max_concurrency = -1;
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-    core_type_id core_type = -1;
-    int max_threads_per_core = -1;
-#endif
-};
-
-} // namespace d1
-
-namespace r1 {
-unsigned __TBB_EXPORTED_FUNC numa_node_count();
-void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array);
-int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id);
-
-// Reserved fields are required to save binary backward compatibility in case of future changes.
-// They must be defined to 0 at this moment.
-unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0);
-void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0);
-
-int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0);
-int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0);
-} // namespace r1
-
-namespace d1 {
-
-inline std::vector<numa_node_id> numa_nodes() {
-    std::vector<numa_node_id> node_indices(r1::numa_node_count());
-    r1::fill_numa_indices(node_indices.data());
-    return node_indices;
-}
-
-inline int default_concurrency(numa_node_id id = -1) {
-    return r1::numa_default_concurrency(id);
-}
-
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-inline std::vector<core_type_id> core_types() {
-    std::vector<int> core_type_indexes(r1::core_type_count());
-    r1::fill_core_type_indices(core_type_indexes.data());
-    return core_type_indexes;
-}
-
-inline int default_concurrency(constraints c) {
-    return r1::constraints_default_concurrency(c);
-}
-#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::numa_node_id;
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-using detail::d1::core_type_id;
-#endif
-
-namespace info {
-using detail::d1::numa_nodes;
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-using detail::d1::core_types;
-#endif
-
-using detail::d1::default_concurrency;
-} // namespace info
-} // namespace v1
-
-} // namespace tbb
-
-#endif /*__TBB_ARENA_BINDING*/
-
-#endif /*__TBB_info_H*/
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_info_H 
+#define __TBB_info_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+#if __TBB_ARENA_BINDING 
+#include <vector> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1{ 
+ 
+using numa_node_id = int; 
+using core_type_id = int; 
+ 
+// TODO: consider version approach to resolve backward compatibility potential issues. 
+struct constraints { 
+#if !__TBB_CPP20_PRESENT 
+    constraints(numa_node_id id = -1, int maximal_concurrency = -1) 
+        : numa_id(id) 
+        , max_concurrency(maximal_concurrency) 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+        , core_type(-1) 
+        , max_threads_per_core(-1) 
+#endif 
+    {} 
+#endif /*!__TBB_CPP20_PRESENT*/ 
+ 
+    constraints& set_numa_id(numa_node_id id) { 
+        numa_id = id; 
+        return *this; 
+    } 
+    constraints& set_max_concurrency(int maximal_concurrency) { 
+        max_concurrency = maximal_concurrency; 
+        return *this; 
+    } 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+    constraints& set_core_type(core_type_id id) { 
+        core_type = id; 
+        return *this; 
+    } 
+    constraints& set_max_threads_per_core(int threads_number) { 
+        max_threads_per_core = threads_number; 
+        return *this; 
+    } 
+#endif 
+ 
+    numa_node_id numa_id = -1; 
+    int max_concurrency = -1; 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+    core_type_id core_type = -1; 
+    int max_threads_per_core = -1; 
+#endif 
+}; 
+ 
+} // namespace d1 
+ 
+namespace r1 { 
+unsigned __TBB_EXPORTED_FUNC numa_node_count(); 
+void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); 
+int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); 
+ 
+// Reserved fields are required to save binary backward compatibility in case of future changes. 
+// They must be defined to 0 at this moment. 
+unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); 
+void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); 
+ 
+int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); 
+int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); 
+} // namespace r1 
+ 
+namespace d1 { 
+ 
+inline std::vector<numa_node_id> numa_nodes() { 
+    std::vector<numa_node_id> node_indices(r1::numa_node_count()); 
+    r1::fill_numa_indices(node_indices.data()); 
+    return node_indices; 
+} 
+ 
+inline int default_concurrency(numa_node_id id = -1) { 
+    return r1::numa_default_concurrency(id); 
+} 
+ 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+inline std::vector<core_type_id> core_types() { 
+    std::vector<int> core_type_indexes(r1::core_type_count()); 
+    r1::fill_core_type_indices(core_type_indexes.data()); 
+    return core_type_indexes; 
+} 
+ 
+inline int default_concurrency(constraints c) { 
+    return r1::constraints_default_concurrency(c); 
+} 
+#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::numa_node_id; 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+using detail::d1::core_type_id; 
+#endif 
+ 
+namespace info { 
+using detail::d1::numa_nodes; 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+using detail::d1::core_types; 
+#endif 
+ 
+using detail::d1::default_concurrency; 
+} // namespace info 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /*__TBB_ARENA_BINDING*/ 
+ 
+#endif /*__TBB_info_H*/ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h
index 6e913c6713..667d70103f 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h
@@ -1,272 +1,272 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_memory_pool_H
-#define __TBB_memory_pool_H
-
-#if !TBB_PREVIEW_MEMORY_POOL
-#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
-#endif
-/** @file */
-
-#include "scalable_allocator.h"
-
-#include <new> // std::bad_alloc
-#include <stdexcept> // std::runtime_error, std::invalid_argument
-#include <utility> // std::forward
-
-
-#if __TBB_EXTRA_DEBUG
-#define __TBBMALLOC_ASSERT ASSERT
-#else
-#define __TBBMALLOC_ASSERT(a,b) ((void)0)
-#endif
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Base of thread-safe pool allocator for variable-size requests
-class pool_base : no_copy {
-    // Pool interface is separate from standard allocator classes because it has
-    // to maintain internal state, no copy or assignment. Move and swap are possible.
-public:
-    //! Reset pool to reuse its memory (free all objects at once)
-    void recycle() { rml::pool_reset(my_pool); }
-
-    //! The "malloc" analogue to allocate block of memory of size bytes
-    void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); }
-
-    //! The "free" analogue to discard a previously allocated piece of memory.
-    void free(void* ptr) { rml::pool_free(my_pool, ptr); }
-
-    //! The "realloc" analogue complementing pool_malloc.
-    // Enables some low-level optimization possibilities
-    void *realloc(void* ptr, size_t size) {
-        return rml::pool_realloc(my_pool, ptr, size);
-    }
-
-protected:
-    //! destroy pool - must be called in a child class
-    void destroy() { rml::pool_destroy(my_pool); }
-
-    rml::MemoryPool *my_pool;
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
-    #pragma warning (push)
-    #pragma warning (disable: 4100)
-#endif
-
-//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
-/** @ingroup memory_allocation */
-template<typename T, typename P = pool_base>
-class memory_pool_allocator {
-protected:
-    typedef P pool_type;
-    pool_type *my_pool;
-    template<typename U, typename R>
-    friend class memory_pool_allocator;
-    template<typename V, typename U, typename R>
-    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-    template<typename V, typename U, typename R>
-    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-public:
-    typedef T value_type;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef size_t size_type;
-    typedef ptrdiff_t difference_type;
-    template<typename U> struct rebind {
-        typedef memory_pool_allocator<U, P> other;
-    };
-
-    explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {}
-    memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
-    template<typename U>
-    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
-
-    pointer address(reference x) const { return &x; }
-    const_pointer address(const_reference x) const { return &x; }
-
-    //! Allocate space for n objects.
-    pointer allocate( size_type n, const void* /*hint*/ = 0) {
-        pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) );
-        if (!p)
-            throw_exception(std::bad_alloc());
-        return p;
-    }
-    //! Free previously allocated block of memory.
-    void deallocate( pointer p, size_type ) {
-        my_pool->free(p);
-    }
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const throw() {
-        size_type max = static_cast<size_type>(-1) / sizeof (value_type);
-        return (max > 0 ? max : 1);
-    }
-    //! Copy-construct value at location pointed to by p.
-
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-
-    //! Destroy value at location pointed to by p.
-    void destroy( pointer p ) { p->~value_type(); }
-
-};
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4100 is back
-
-//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
-/** @ingroup memory_allocation */
-template<typename P>
-class memory_pool_allocator<void, P> {
-public:
-    typedef P pool_type;
-    typedef void* pointer;
-    typedef const void* const_pointer;
-    typedef void value_type;
-    template<typename U> struct rebind {
-        typedef memory_pool_allocator<U, P> other;
-    };
-
-    explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {}
-    memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
-    template<typename U>
-    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
-
-protected:
-    pool_type *my_pool;
-    template<typename U, typename R>
-    friend class memory_pool_allocator;
-    template<typename V, typename U, typename R>
-    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-    template<typename V, typename U, typename R>
-    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
-};
-
-template<typename T, typename U, typename P>
-inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;}
-
-template<typename T, typename U, typename P>
-inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;}
-
-//! Thread-safe growable pool allocator for variable-size requests
-template <typename Alloc>
-class memory_pool : public pool_base {
-    Alloc my_alloc; // TODO: base-class optimization
-    static void *allocate_request(intptr_t pool_id, size_t & bytes);
-    static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes);
-
-public:
-    //! construct pool with underlying allocator
-    explicit memory_pool(const Alloc &src = Alloc());
-
-    //! destroy pool
-    ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter
-};
-
-class fixed_pool : public pool_base {
-    void *my_buffer;
-    size_t my_size;
-    inline static void *allocate_request(intptr_t pool_id, size_t & bytes);
-
-public:
-    //! construct pool with underlying allocator
-    inline fixed_pool(void *buf, size_t size);
-    //! destroy pool
-    ~fixed_pool() { destroy(); }
-};
-
-//////////////// Implementation ///////////////
-
-template <typename Alloc>
-memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) {
-    rml::MemPoolPolicy args(allocate_request, deallocate_request,
-                            sizeof(typename Alloc::value_type));
-    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
-    if (res!=rml::POOL_OK)
-        throw_exception(std::runtime_error("Can't create pool"));
-}
-template <typename Alloc>
-void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) {
-    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
-    const size_t unit_size = sizeof(typename Alloc::value_type);
-    __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL);
-    void *ptr;
-#if TBB_USE_EXCEPTIONS
-    try {
-#endif
-        ptr = self.my_alloc.allocate( bytes/unit_size );
-#if TBB_USE_EXCEPTIONS
-    } catch(...) {
-        return 0;
-    }
-#endif
-    return ptr;
-}
-#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
-    // Workaround for erroneous "unreachable code" warning in the template below.
-    // Specific for VC++ 17-18 compiler
-    #pragma warning (push)
-    #pragma warning (disable: 4702)
-#endif
-template <typename Alloc>
-int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) {
-    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
-    const size_t unit_size = sizeof(typename Alloc::value_type);
-    __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL);
-    self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size );
-    return 0;
-}
-#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
-    #pragma warning (pop)
-#endif
-inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) {
-    if (!buf || !size)
-        // TODO: improve support for mode with exceptions disabled
-        throw_exception(std::invalid_argument("Zero in parameter is invalid"));
-    rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true);
-    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
-    if (res!=rml::POOL_OK)
-        throw_exception(std::runtime_error("Can't create pool"));
-}
-inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
-    fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
-    __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice.");
-    bytes = self.my_size;
-    self.my_size = 0; // remember that buffer has been used
-    return self.my_buffer;
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::memory_pool_allocator;
-using detail::d1::memory_pool;
-using detail::d1::fixed_pool;
-} // inline namepspace v1
-} // namespace tbb
-
-#undef __TBBMALLOC_ASSERT
-#endif// __TBB_memory_pool_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_memory_pool_H 
+#define __TBB_memory_pool_H 
+ 
+#if !TBB_PREVIEW_MEMORY_POOL 
+#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h 
+#endif 
+/** @file */ 
+ 
+#include "scalable_allocator.h" 
+ 
+#include <new> // std::bad_alloc 
+#include <stdexcept> // std::runtime_error, std::invalid_argument 
+#include <utility> // std::forward 
+ 
+ 
+#if __TBB_EXTRA_DEBUG 
+#define __TBBMALLOC_ASSERT ASSERT 
+#else 
+#define __TBBMALLOC_ASSERT(a,b) ((void)0) 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Base of thread-safe pool allocator for variable-size requests 
+class pool_base : no_copy { 
+    // Pool interface is separate from standard allocator classes because it has 
+    // to maintain internal state, no copy or assignment. Move and swap are possible. 
+public: 
+    //! Reset pool to reuse its memory (free all objects at once) 
+    void recycle() { rml::pool_reset(my_pool); } 
+ 
+    //! The "malloc" analogue to allocate block of memory of size bytes 
+    void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); } 
+ 
+    //! The "free" analogue to discard a previously allocated piece of memory. 
+    void free(void* ptr) { rml::pool_free(my_pool, ptr); } 
+ 
+    //! The "realloc" analogue complementing pool_malloc. 
+    // Enables some low-level optimization possibilities 
+    void *realloc(void* ptr, size_t size) { 
+        return rml::pool_realloc(my_pool, ptr, size); 
+    } 
+ 
+protected: 
+    //! destroy pool - must be called in a child class 
+    void destroy() { rml::pool_destroy(my_pool); } 
+ 
+    rml::MemoryPool *my_pool; 
+}; 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy. 
+    #pragma warning (push) 
+    #pragma warning (disable: 4100) 
+#endif 
+ 
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 
+/** @ingroup memory_allocation */ 
+template<typename T, typename P = pool_base> 
+class memory_pool_allocator { 
+protected: 
+    typedef P pool_type; 
+    pool_type *my_pool; 
+    template<typename U, typename R> 
+    friend class memory_pool_allocator; 
+    template<typename V, typename U, typename R> 
+    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); 
+    template<typename V, typename U, typename R> 
+    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); 
+public: 
+    typedef T value_type; 
+    typedef value_type* pointer; 
+    typedef const value_type* const_pointer; 
+    typedef value_type& reference; 
+    typedef const value_type& const_reference; 
+    typedef size_t size_type; 
+    typedef ptrdiff_t difference_type; 
+    template<typename U> struct rebind { 
+        typedef memory_pool_allocator<U, P> other; 
+    }; 
+ 
+    explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} 
+    memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} 
+    template<typename U> 
+    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} 
+ 
+    pointer address(reference x) const { return &x; } 
+    const_pointer address(const_reference x) const { return &x; } 
+ 
+    //! Allocate space for n objects. 
+    pointer allocate( size_type n, const void* /*hint*/ = 0) { 
+        pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) ); 
+        if (!p) 
+            throw_exception(std::bad_alloc()); 
+        return p; 
+    } 
+    //! Free previously allocated block of memory. 
+    void deallocate( pointer p, size_type ) { 
+        my_pool->free(p); 
+    } 
+    //! Largest value for which method allocate might succeed. 
+    size_type max_size() const throw() { 
+        size_type max = static_cast<size_type>(-1) / sizeof (value_type); 
+        return (max > 0 ? max : 1); 
+    } 
+    //! Copy-construct value at location pointed to by p. 
+ 
+    template<typename U, typename... Args> 
+    void construct(U *p, Args&&... args) 
+        { ::new((void *)p) U(std::forward<Args>(args)...); } 
+ 
+    //! Destroy value at location pointed to by p. 
+    void destroy( pointer p ) { p->~value_type(); } 
+ 
+}; 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    #pragma warning (pop) 
+#endif // warning 4100 is back 
+ 
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1 
+/** @ingroup memory_allocation */ 
+template<typename P> 
+class memory_pool_allocator<void, P> { 
+public: 
+    typedef P pool_type; 
+    typedef void* pointer; 
+    typedef const void* const_pointer; 
+    typedef void value_type; 
+    template<typename U> struct rebind { 
+        typedef memory_pool_allocator<U, P> other; 
+    }; 
+ 
+    explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} 
+    memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} 
+    template<typename U> 
+    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} 
+ 
+protected: 
+    pool_type *my_pool; 
+    template<typename U, typename R> 
+    friend class memory_pool_allocator; 
+    template<typename V, typename U, typename R> 
+    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); 
+    template<typename V, typename U, typename R> 
+    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); 
+}; 
+ 
+template<typename T, typename U, typename P> 
+inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;} 
+ 
+template<typename T, typename U, typename P> 
+inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;} 
+ 
+//! Thread-safe growable pool allocator for variable-size requests 
+template <typename Alloc> 
+class memory_pool : public pool_base { 
+    Alloc my_alloc; // TODO: base-class optimization 
+    static void *allocate_request(intptr_t pool_id, size_t & bytes); 
+    static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes); 
+ 
+public: 
+    //! construct pool with underlying allocator 
+    explicit memory_pool(const Alloc &src = Alloc()); 
+ 
+    //! destroy pool 
+    ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter 
+}; 
+ 
+class fixed_pool : public pool_base { 
+    void *my_buffer; 
+    size_t my_size; 
+    inline static void *allocate_request(intptr_t pool_id, size_t & bytes); 
+ 
+public: 
+    //! construct pool with underlying allocator 
+    inline fixed_pool(void *buf, size_t size); 
+    //! destroy pool 
+    ~fixed_pool() { destroy(); } 
+}; 
+ 
+//////////////// Implementation /////////////// 
+ 
+template <typename Alloc> 
+memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) { 
+    rml::MemPoolPolicy args(allocate_request, deallocate_request, 
+                            sizeof(typename Alloc::value_type)); 
+    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); 
+    if (res!=rml::POOL_OK) 
+        throw_exception(std::runtime_error("Can't create pool")); 
+} 
+template <typename Alloc> 
+void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) { 
+    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); 
+    const size_t unit_size = sizeof(typename Alloc::value_type); 
+    __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); 
+    void *ptr; 
+#if TBB_USE_EXCEPTIONS 
+    try { 
+#endif 
+        ptr = self.my_alloc.allocate( bytes/unit_size ); 
+#if TBB_USE_EXCEPTIONS 
+    } catch(...) { 
+        return 0; 
+    } 
+#endif 
+    return ptr; 
+} 
+#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED 
+    // Workaround for erroneous "unreachable code" warning in the template below. 
+    // Specific for VC++ 17-18 compiler 
+    #pragma warning (push) 
+    #pragma warning (disable: 4702) 
+#endif 
+template <typename Alloc> 
+int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) { 
+    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); 
+    const size_t unit_size = sizeof(typename Alloc::value_type); 
+    __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL); 
+    self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size ); 
+    return 0; 
+} 
+#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED 
+    #pragma warning (pop) 
+#endif 
+inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) { 
+    if (!buf || !size) 
+        // TODO: improve support for mode with exceptions disabled 
+        throw_exception(std::invalid_argument("Zero in parameter is invalid")); 
+    rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); 
+    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); 
+    if (res!=rml::POOL_OK) 
+        throw_exception(std::runtime_error("Can't create pool")); 
+} 
+inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { 
+    fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id); 
+    __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice."); 
+    bytes = self.my_size; 
+    self.my_size = 0; // remember that buffer has been used 
+    return self.my_buffer; 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::memory_pool_allocator; 
+using detail::d1::memory_pool; 
+using detail::d1::fixed_pool; 
+} // inline namepspace v1 
+} // namespace tbb 
+ 
+#undef __TBBMALLOC_ASSERT 
+#endif// __TBB_memory_pool_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h
index 8fab863db3..d0e9e3acbb 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h
@@ -1,79 +1,79 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_null_mutex_H
-#define __TBB_null_mutex_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! A mutex which does nothing
-/** A null_mutex does no operation and simulates success.
-    @ingroup synchronization */
-class null_mutex {
-public:
-    //! Constructors
-    constexpr null_mutex() noexcept = default;
-
-    //! Destructor
-    ~null_mutex() = default;
-
-    //! No Copy
-    null_mutex(const null_mutex&) = delete;
-    null_mutex& operator=(const null_mutex&) = delete;
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock {
-    public:
-        //! Constructors
-        constexpr scoped_lock() noexcept = default;
-        scoped_lock(null_mutex&) {}
-
-        //! Destructor
-        ~scoped_lock() = default;
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        void acquire(null_mutex&) {}
-        bool try_acquire(null_mutex&) { return true; }
-        void release() {}
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = false;
-    static constexpr bool is_recursive_mutex = true;
-    static constexpr bool is_fair_mutex = true;
-
-    void lock() {}
-    bool try_lock() { return true; }
-    void unlock() {}
-}; // class null_mutex
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::null_mutex;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_null_mutex_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_null_mutex_H 
+#define __TBB_null_mutex_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! A mutex which does nothing 
+/** A null_mutex does no operation and simulates success. 
+    @ingroup synchronization */ 
+class null_mutex { 
+public: 
+    //! Constructors 
+    constexpr null_mutex() noexcept = default; 
+ 
+    //! Destructor 
+    ~null_mutex() = default; 
+ 
+    //! No Copy 
+    null_mutex(const null_mutex&) = delete; 
+    null_mutex& operator=(const null_mutex&) = delete; 
+ 
+    //! Represents acquisition of a mutex. 
+    class scoped_lock { 
+    public: 
+        //! Constructors 
+        constexpr scoped_lock() noexcept = default; 
+        scoped_lock(null_mutex&) {} 
+ 
+        //! Destructor 
+        ~scoped_lock() = default; 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        void acquire(null_mutex&) {} 
+        bool try_acquire(null_mutex&) { return true; } 
+        void release() {} 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = false; 
+    static constexpr bool is_recursive_mutex = true; 
+    static constexpr bool is_fair_mutex = true; 
+ 
+    void lock() {} 
+    bool try_lock() { return true; } 
+    void unlock() {} 
+}; // class null_mutex 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::null_mutex; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_null_mutex_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h
index 8046bc405d..9d0f8da2a1 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h
@@ -1,84 +1,84 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_null_rw_mutex_H
-#define __TBB_null_rw_mutex_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! A rw mutex which does nothing
-/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation.
-    @ingroup synchronization */
-class null_rw_mutex {
-public:
-    //! Constructors
-    constexpr null_rw_mutex() noexcept = default;
-
-    //! Destructor
-    ~null_rw_mutex() = default;
-
-    //! No Copy
-    null_rw_mutex(const null_rw_mutex&) = delete;
-    null_rw_mutex& operator=(const null_rw_mutex&) = delete;
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock {
-    public:
-        //! Constructors
-        constexpr scoped_lock() noexcept = default;
-        scoped_lock(null_rw_mutex&, bool = true) {}
-
-        //! Destructor
-        ~scoped_lock() = default;
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        void acquire(null_rw_mutex&, bool = true) {}
-        bool try_acquire(null_rw_mutex&, bool = true) { return true; }
-        void release() {}
-        bool upgrade_to_writer() { return true; }
-        bool downgrade_to_reader() { return true; }
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = true;
-    static constexpr bool is_recursive_mutex = true;
-    static constexpr bool is_fair_mutex = true;
-
-    void lock() {}
-    bool try_lock() { return true; }
-    void unlock() {}
-    void lock_shared() {}
-    bool try_lock_shared() { return true; }
-    void unlock_shared() {}
-}; // class null_rw_mutex
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::null_rw_mutex;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_null_rw_mutex_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_null_rw_mutex_H 
+#define __TBB_null_rw_mutex_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! A rw mutex which does nothing 
+/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation. 
+    @ingroup synchronization */ 
+class null_rw_mutex { 
+public: 
+    //! Constructors 
+    constexpr null_rw_mutex() noexcept = default; 
+ 
+    //! Destructor 
+    ~null_rw_mutex() = default; 
+ 
+    //! No Copy 
+    null_rw_mutex(const null_rw_mutex&) = delete; 
+    null_rw_mutex& operator=(const null_rw_mutex&) = delete; 
+ 
+    //! Represents acquisition of a mutex. 
+    class scoped_lock { 
+    public: 
+        //! Constructors 
+        constexpr scoped_lock() noexcept = default; 
+        scoped_lock(null_rw_mutex&, bool = true) {} 
+ 
+        //! Destructor 
+        ~scoped_lock() = default; 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        void acquire(null_rw_mutex&, bool = true) {} 
+        bool try_acquire(null_rw_mutex&, bool = true) { return true; } 
+        void release() {} 
+        bool upgrade_to_writer() { return true; } 
+        bool downgrade_to_reader() { return true; } 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = true; 
+    static constexpr bool is_recursive_mutex = true; 
+    static constexpr bool is_fair_mutex = true; 
+ 
+    void lock() {} 
+    bool try_lock() { return true; } 
+    void unlock() {} 
+    void lock_shared() {} 
+    bool try_lock_shared() { return true; } 
+    void unlock_shared() {} 
+}; // class null_rw_mutex 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::null_rw_mutex; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_null_rw_mutex_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h
index ed137d4d09..0dc774e078 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h
@@ -1,416 +1,416 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_for_H
-#define __TBB_parallel_for_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_exception.h"
-#include "detail/_task.h"
-#include "detail/_small_object_pool.h"
-#include "profiling.h"
-
-#include "partitioner.h"
-#include "blocked_range.h"
-#include "task_group.h"
-
-#include <cstddef>
-#include <new>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Task type used in parallel_for
-/** @ingroup algorithms */
-template<typename Range, typename Body, typename Partitioner>
-struct start_for : public task {
-    Range my_range;
-    const Body my_body;
-    node* my_parent;
-
-    typename Partitioner::task_partition_type my_partition;
-    small_object_allocator my_allocator;
-
-    task* execute(execution_data&) override;
-    task* cancel(execution_data&) override;
-    void finalize(const execution_data&);
-
-    //! Constructor for root task.
-    start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
-        my_range(range),
-        my_body(body),
-        my_partition(partitioner),
-        my_allocator(alloc) {}
-    //! Splitting constructor used to generate children.
-    /** parent_ becomes left child.  Newly constructed object is right child. */
-    start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
-        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
-        my_body(parent_.my_body),
-        my_partition(parent_.my_partition, split_obj),
-        my_allocator(alloc) {}
-    //! Construct right child from the given range as response to the demand.
-    /** parent_ remains left child.  Newly constructed object is right child. */
-    start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
-        my_range(r),
-        my_body(parent_.my_body),
-        my_partition(parent_.my_partition, split()),
-        my_allocator(alloc)
-    {
-        my_partition.align_depth( d );
-    }
-    static void run(const Range& range, const Body& body, Partitioner& partitioner) {
-        task_group_context context(PARALLEL_FOR);
-        run(range, body, partitioner, context);
-    }
-
-    static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) {
-        if ( !range.empty() ) {
-            small_object_allocator alloc{};
-            start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc);
-
-            // defer creation of the wait node until task allocation succeeds
-            wait_node wn;
-            for_task.my_parent = &wn;
-            execute_and_wait(for_task, context, wn.m_wait, context);
-        }
-    }
-    //! Run body for range, serves as callback for partitioner
-    void run_body( Range &r ) {
-        my_body( r );
-    }
-
-    //! spawn right task, serves as callback for partitioner
-    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
-       offer_work_impl(ed, *this, split_obj);
-    }
-
-    //! spawn right task, serves as callback for partitioner
-    void offer_work(const Range& r, depth_t d, execution_data& ed) {
-        offer_work_impl(ed, *this, r, d);
-    }
-
-private:
-    template <typename... Args>
-    void offer_work_impl(execution_data& ed, Args&&... constructor_args) {
-        // New right child
-        small_object_allocator alloc{};
-        start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc);
-
-        // New root node as a continuation and ref count. Left and right child attach to the new parent.
-        right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc);
-        // Spawn the right sibling
-        right_child.spawn_self(ed);
-    }
-
-    void spawn_self(execution_data& ed) {
-        my_partition.spawn_task(*this, *context(ed));
-    }
-};
-
-//! fold the tree and deallocate the task
-template<typename Range, typename Body, typename Partitioner>
-void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) {
-    // Get the current parent and allocator an object destruction
-    node* parent = my_parent;
-    auto allocator = my_allocator;
-    // Task execution finished - destroy it
-    this->~start_for();
-    // Unwind the tree decrementing the parent`s reference count
-
-    fold_tree<tree_node>(parent, ed);
-    allocator.deallocate(this, ed);
-
-}
-
-//! execute task for parallel_for
-template<typename Range, typename Body, typename Partitioner>
-task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) {
-    if (!is_same_affinity(ed)) {
-        my_partition.note_affinity(execution_slot(ed));
-    }
-    my_partition.check_being_stolen(*this, ed);
-    my_partition.execute(*this, my_range, ed);
-    finalize(ed);
-    return nullptr;
-}
-
-//! cancel task for parallel_for
-template<typename Range, typename Body, typename Partitioner>
-task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) {
-    finalize(ed);
-    return nullptr;
-}
-
-//! Calls the function with values from range [begin, end) with a step provided
-template<typename Function, typename Index>
-class parallel_for_body : detail::no_assign {
-    const Function &my_func;
-    const Index my_begin;
-    const Index my_step;
-public:
-    parallel_for_body( const Function& _func, Index& _begin, Index& _step )
-        : my_func(_func), my_begin(_begin), my_step(_step) {}
-
-    void operator()( const blocked_range<Index>& r ) const {
-        // A set of local variables to help the compiler with vectorization of the following loop.
-        Index b = r.begin();
-        Index e = r.end();
-        Index ms = my_step;
-        Index k = my_begin + b*ms;
-
-#if __INTEL_COMPILER
-#pragma ivdep
-#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
-#pragma vector always assert
-#endif
-#endif
-        for ( Index i = b; i < e; ++i, k += ms ) {
-            my_func( k );
-        }
-    }
-};
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_for_body_req Requirements on parallel_for body
-    Class \c Body implementing the concept of parallel_for body must define:
-    - \code Body::Body( const Body& ); \endcode                 Copy constructor
-    - \code Body::~Body(); \endcode                             Destructor
-    - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
-**/
-
-/** \name parallel_for
-    See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
-//@{
-
-//! Parallel iteration over range with default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body ) {
-    start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
-}
-
-//! Parallel iteration over range with simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
-    start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with auto_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
-    start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with static_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
-    start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with affinity_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
-    start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
-}
-
-//! Parallel iteration over range with default partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
-    start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
-}
-
-//! Parallel iteration over range with simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
-    start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with auto_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
-    start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with static_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
-    start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration over range with affinity_partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
-    start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
-}
-
-//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
-template <typename Index, typename Function, typename Partitioner>
-void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
-    if (step <= 0 )
-        throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
-    else if (last > first) {
-        // Above "else" avoids "potential divide by zero" warning on some platforms
-        Index end = (last - first - Index(1)) / step + Index(1);
-        blocked_range<Index> range(static_cast<Index>(0), end);
-        parallel_for_body<Function, Index> body(f, first, step);
-        parallel_for(range, body, partitioner);
-    }
-}
-
-//! Parallel iteration over a range of integers with a step provided and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with a step provided and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
-}
-//! Parallel iteration over a range of integers with a step provided and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
-    parallel_for_impl(first, last, step, f, partitioner);
-}
-
-//! Parallel iteration over a range of integers with a default step value and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
-}
-//! Parallel iteration over a range of integers with a default step value and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
-}
-//! Parallel iteration over a range of integers with a default step value and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
-    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
-}
-
-//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
-template <typename Index, typename Function, typename Partitioner>
-void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) {
-    if (step <= 0 )
-        throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
-    else if (last > first) {
-        // Above "else" avoids "potential divide by zero" warning on some platforms
-        Index end = (last - first - Index(1)) / step + Index(1);
-        blocked_range<Index> range(static_cast<Index>(0), end);
-        parallel_for_body<Function, Index> body(f, first, step);
-        parallel_for(range, body, partitioner, context);
-    }
-}
-
-//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl(first, last, step, f, partitioner, context);
-}
-
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
-template <typename Index, typename Function>
-void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
-    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
-}
-// @}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::parallel_for;
-// Split types
-using detail::split;
-using detail::proportional_split;
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_parallel_for_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_for_H 
+#define __TBB_parallel_for_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_exception.h" 
+#include "detail/_task.h" 
+#include "detail/_small_object_pool.h" 
+#include "profiling.h" 
+ 
+#include "partitioner.h" 
+#include "blocked_range.h" 
+#include "task_group.h" 
+ 
+#include <cstddef> 
+#include <new> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Task type used in parallel_for 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body, typename Partitioner> 
+struct start_for : public task { 
+    Range my_range; 
+    const Body my_body; 
+    node* my_parent; 
+ 
+    typename Partitioner::task_partition_type my_partition; 
+    small_object_allocator my_allocator; 
+ 
+    task* execute(execution_data&) override; 
+    task* cancel(execution_data&) override; 
+    void finalize(const execution_data&); 
+ 
+    //! Constructor for root task. 
+    start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : 
+        my_range(range), 
+        my_body(body), 
+        my_partition(partitioner), 
+        my_allocator(alloc) {} 
+    //! Splitting constructor used to generate children. 
+    /** parent_ becomes left child.  Newly constructed object is right child. */ 
+    start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : 
+        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 
+        my_body(parent_.my_body), 
+        my_partition(parent_.my_partition, split_obj), 
+        my_allocator(alloc) {} 
+    //! Construct right child from the given range as response to the demand. 
+    /** parent_ remains left child.  Newly constructed object is right child. */ 
+    start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : 
+        my_range(r), 
+        my_body(parent_.my_body), 
+        my_partition(parent_.my_partition, split()), 
+        my_allocator(alloc) 
+    { 
+        my_partition.align_depth( d ); 
+    } 
+    static void run(const Range& range, const Body& body, Partitioner& partitioner) { 
+        task_group_context context(PARALLEL_FOR); 
+        run(range, body, partitioner, context); 
+    } 
+ 
+    static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) { 
+        if ( !range.empty() ) { 
+            small_object_allocator alloc{}; 
+            start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc); 
+ 
+            // defer creation of the wait node until task allocation succeeds 
+            wait_node wn; 
+            for_task.my_parent = &wn; 
+            execute_and_wait(for_task, context, wn.m_wait, context); 
+        } 
+    } 
+    //! Run body for range, serves as callback for partitioner 
+    void run_body( Range &r ) { 
+        my_body( r ); 
+    } 
+ 
+    //! spawn right task, serves as callback for partitioner 
+    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 
+       offer_work_impl(ed, *this, split_obj); 
+    } 
+ 
+    //! spawn right task, serves as callback for partitioner 
+    void offer_work(const Range& r, depth_t d, execution_data& ed) { 
+        offer_work_impl(ed, *this, r, d); 
+    } 
+ 
+private: 
+    template <typename... Args> 
+    void offer_work_impl(execution_data& ed, Args&&... constructor_args) { 
+        // New right child 
+        small_object_allocator alloc{}; 
+        start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc); 
+ 
+        // New root node as a continuation and ref count. Left and right child attach to the new parent. 
+        right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc); 
+        // Spawn the right sibling 
+        right_child.spawn_self(ed); 
+    } 
+ 
+    void spawn_self(execution_data& ed) { 
+        my_partition.spawn_task(*this, *context(ed)); 
+    } 
+}; 
+ 
+//! fold the tree and deallocate the task 
+template<typename Range, typename Body, typename Partitioner> 
+void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) { 
+    // Get the current parent and allocator an object destruction 
+    node* parent = my_parent; 
+    auto allocator = my_allocator; 
+    // Task execution finished - destroy it 
+    this->~start_for(); 
+    // Unwind the tree decrementing the parent`s reference count 
+ 
+    fold_tree<tree_node>(parent, ed); 
+    allocator.deallocate(this, ed); 
+ 
+} 
+ 
+//! execute task for parallel_for 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) { 
+    if (!is_same_affinity(ed)) { 
+        my_partition.note_affinity(execution_slot(ed)); 
+    } 
+    my_partition.check_being_stolen(*this, ed); 
+    my_partition.execute(*this, my_range, ed); 
+    finalize(ed); 
+    return nullptr; 
+} 
+ 
+//! cancel task for parallel_for 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) { 
+    finalize(ed); 
+    return nullptr; 
+} 
+ 
+//! Calls the function with values from range [begin, end) with a step provided 
+template<typename Function, typename Index> 
+class parallel_for_body : detail::no_assign { 
+    const Function &my_func; 
+    const Index my_begin; 
+    const Index my_step; 
+public: 
+    parallel_for_body( const Function& _func, Index& _begin, Index& _step ) 
+        : my_func(_func), my_begin(_begin), my_step(_step) {} 
+ 
+    void operator()( const blocked_range<Index>& r ) const { 
+        // A set of local variables to help the compiler with vectorization of the following loop. 
+        Index b = r.begin(); 
+        Index e = r.end(); 
+        Index ms = my_step; 
+        Index k = my_begin + b*ms; 
+ 
+#if __INTEL_COMPILER 
+#pragma ivdep 
+#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE 
+#pragma vector always assert 
+#endif 
+#endif 
+        for ( Index i = b; i < e; ++i, k += ms ) { 
+            my_func( k ); 
+        } 
+    } 
+}; 
+ 
+// Requirements on Range concept are documented in blocked_range.h 
+ 
+/** \page parallel_for_body_req Requirements on parallel_for body 
+    Class \c Body implementing the concept of parallel_for body must define: 
+    - \code Body::Body( const Body& ); \endcode                 Copy constructor 
+    - \code Body::~Body(); \endcode                             Destructor 
+    - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r. 
+**/ 
+ 
+/** \name parallel_for 
+    See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ 
+//@{ 
+ 
+//! Parallel iteration over range with default partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body ) { 
+    start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); 
+} 
+ 
+//! Parallel iteration over range with simple partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { 
+    start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); 
+} 
+ 
+//! Parallel iteration over range with auto_partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { 
+    start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); 
+} 
+ 
+//! Parallel iteration over range with static_partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { 
+    start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); 
+} 
+ 
+//! Parallel iteration over range with affinity_partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { 
+    start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); 
+} 
+ 
+//! Parallel iteration over range with default partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, task_group_context& context ) { 
+    start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); 
+} 
+ 
+//! Parallel iteration over range with simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 
+    start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); 
+} 
+ 
+//! Parallel iteration over range with auto_partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { 
+    start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); 
+} 
+ 
+//! Parallel iteration over range with static_partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { 
+    start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); 
+} 
+ 
+//! Parallel iteration over range with affinity_partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { 
+    start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); 
+} 
+ 
+//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner 
+template <typename Index, typename Function, typename Partitioner> 
+void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { 
+    if (step <= 0 ) 
+        throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument 
+    else if (last > first) { 
+        // Above "else" avoids "potential divide by zero" warning on some platforms 
+        Index end = (last - first - Index(1)) / step + Index(1); 
+        blocked_range<Index> range(static_cast<Index>(0), end); 
+        parallel_for_body<Function, Index> body(f, first, step); 
+        parallel_for(range, body, partitioner); 
+    } 
+} 
+ 
+//! Parallel iteration over a range of integers with a step provided and default partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); 
+} 
+//! Parallel iteration over a range of integers with a step provided and simple partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a step provided and auto partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a step provided and static partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a step provided and affinity partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { 
+    parallel_for_impl(first, last, step, f, partitioner); 
+} 
+ 
+//! Parallel iteration over a range of integers with a default step value and default partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); 
+} 
+//! Parallel iteration over a range of integers with a default step value and simple partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a default step value and auto partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a default step value and static partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { 
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); 
+} 
+//! Parallel iteration over a range of integers with a default step value and affinity partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { 
+    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); 
+} 
+ 
+//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner 
+template <typename Index, typename Function, typename Partitioner> 
+void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { 
+    if (step <= 0 ) 
+        throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument 
+    else if (last > first) { 
+        // Above "else" avoids "potential divide by zero" warning on some platforms 
+        Index end = (last - first - Index(1)) / step + Index(1); 
+        blocked_range<Index> range(static_cast<Index>(0), end); 
+        parallel_for_body<Function, Index> body(f, first, step); 
+        parallel_for(range, body, partitioner, context); 
+    } 
+} 
+ 
+//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); 
+} 
+//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl(first, last, step, f, partitioner, context); 
+} 
+ 
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); 
+} 
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); 
+} 
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner 
+template <typename Index, typename Function> 
+void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { 
+    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); 
+} 
+// @} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::parallel_for; 
+// Split types 
+using detail::split; 
+using detail::proportional_split; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_parallel_for_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h
index 563e00f5fc..0050b1746c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h
@@ -1,644 +1,644 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_for_each_H
-#define __TBB_parallel_for_each_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_exception.h"
-#include "detail/_task.h"
-#include "detail/_aligned_space.h"
-#include "detail/_small_object_pool.h"
-
-#include "parallel_for.h"
-#include "task_group.h" // task_group_context
-
-#include <iterator>
-#include <type_traits>
-
-namespace tbb {
-namespace detail {
-namespace d2 {
-template<typename Body, typename Item> class feeder_impl;
-} // namespace d2
-
-namespace d1 {
-//! Class the user supplied algorithm body uses to add new tasks
-template<typename Item>
-class feeder {
-    feeder() {}
-    feeder(const feeder&) = delete;
-    void operator=( const feeder&) = delete;
-
-    virtual ~feeder () {}
-    virtual void internal_add_copy(const Item& item) = 0;
-    virtual void internal_add_move(Item&& item) = 0;
-
-    template<typename Body_, typename Item_> friend class detail::d2::feeder_impl;
-public:
-    //! Add a work item to a running parallel_for_each.
-    void add(const Item& item) {internal_add_copy(item);}
-    void add(Item&& item) {internal_add_move(std::move(item));}
-};
-
-} // namespace d1
-
-namespace d2 {
-using namespace tbb::detail::d1;
-/** Selects one of the two possible forms of function call member operator.
-    @ingroup algorithms **/
-template<class Body>
-struct parallel_for_each_operator_selector {
-public:
-    template<typename ItemArg, typename FeederArg>
-    static auto call(const Body& body, ItemArg&& item, FeederArg*)
-    -> decltype(body(std::forward<ItemArg>(item)), void()) {
-        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-        // Suppression of Microsoft non-standard extension warnings
-        #pragma warning (push)
-        #pragma warning (disable: 4239)
-        #endif
-
-        body(std::forward<ItemArg>(item));
-
-        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-        #pragma warning (push)
-        #endif
-    }
-
-    template<typename ItemArg, typename FeederArg>
-    static auto call(const Body& body, ItemArg&& item, FeederArg* feeder)
-    -> decltype(body(std::forward<ItemArg>(item), *feeder), void()) {
-        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-        // Suppression of Microsoft non-standard extension warnings
-        #pragma warning (push)
-        #pragma warning (disable: 4239)
-        #endif
-        __TBB_ASSERT(feeder, "Feeder was not created but should be");
-
-        body(std::forward<ItemArg>(item), *feeder);
-
-        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-        #pragma warning (push)
-        #endif
-    }
-};
-
-template<typename Body, typename Item>
-struct feeder_item_task: public task {
-    using feeder_type = feeder_impl<Body, Item>;
-
-    template <typename ItemType>
-    feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) :
-        item(std::forward<ItemType>(input_item)),
-        my_feeder(feeder),
-        my_allocator(alloc)
-    {}
-
-    void finalize(const execution_data& ed) {
-        my_feeder.my_wait_context.release();
-        my_allocator.delete_object(this, ed);
-    }
-
-    //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy
-    //! Executing body with moving the copy should have higher priority
-    using first_priority = int;
-    using second_priority = double;
-
-    template <typename BodyType, typename ItemType, typename FeederType>
-    static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority)
-    -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void())
-    {
-        parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder);
-    }
-
-    template <typename BodyType, typename ItemType, typename FeederType>
-    static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) {
-        parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder);
-    }
-
-    task* execute(execution_data& ed) override {
-        call(my_feeder.my_body, item, my_feeder, first_priority{});
-        finalize(ed);
-        return nullptr;
-    }
-
-    task* cancel(execution_data& ed) override {
-        finalize(ed);
-        return nullptr;
-    }
-
-    Item item;
-    feeder_type& my_feeder;
-    small_object_allocator my_allocator;
-}; // class feeder_item_task
-
-/** Implements new task adding procedure.
-    @ingroup algorithms **/
-template<typename Body, typename Item>
-class feeder_impl : public feeder<Item> {
-    // Avoiding use of copy constructor in a virtual method if the type does not support it
-    void internal_add_copy_impl(std::true_type, const Item& item) {
-        using feeder_task = feeder_item_task<Body, Item>;
-        small_object_allocator alloc;
-        auto task = alloc.new_object<feeder_task>(item, *this, alloc);
-
-        my_wait_context.reserve();
-        spawn(*task, my_execution_context);
-    }
-
-    void internal_add_copy_impl(std::false_type, const Item&) {
-        __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object");
-    }
-
-    void internal_add_copy(const Item& item) override {
-        internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item);
-    }
-
-    void internal_add_move(Item&& item) override {
-        using feeder_task = feeder_item_task<Body, Item>;
-        small_object_allocator alloc{};
-        auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc);
-
-        my_wait_context.reserve();
-        spawn(*task, my_execution_context);
-    }
-public:
-    feeder_impl(const Body& body, wait_context& w_context, task_group_context &context)
-      : my_body(body),
-        my_wait_context(w_context)
-      , my_execution_context(context)
-    {}
-
-    const Body& my_body;
-    wait_context& my_wait_context;
-    task_group_context& my_execution_context;
-}; // class feeder_impl
-
-/** Execute computation under one element of the range
-    @ingroup algorithms **/
-template<typename Iterator, typename Body, typename Item>
-struct for_each_iteration_task: public task {
-    using feeder_type = feeder_impl<Body, Item>;
-
-    for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) :
-        item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context)
-    {}
-
-    void finalize() {
-        parent_wait_context.release();
-    }
-
-    task* execute(execution_data&) override {
-        parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr);
-        finalize();
-        return nullptr;
-    }
-
-    task* cancel(execution_data&) override {
-        finalize();
-        return nullptr;
-    }
-
-    Iterator item_ptr;
-    const Body& my_body;
-    feeder_impl<Body, Item>* my_feeder_ptr;
-    wait_context& parent_wait_context;
-}; // class for_each_iteration_task
-
-// Helper to get the type of the iterator to the internal sequence of copies
-// If the element can be passed to the body as an rvalue - this iterator should be move_iterator
-template <typename Body, typename Item, typename = void>
-struct input_iteration_task_iterator_helper {
-    // For input iterators we pass const lvalue reference to the body
-    // It is prohibited to take non-constant lvalue references for input iterators
-    using type = const Item*;
-};
-
-template <typename Body, typename Item>
-struct input_iteration_task_iterator_helper<Body, Item,
-    tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(),
-                                                                                 std::declval<Item&&>(),
-                                                                                 std::declval<feeder_impl<Body, Item>*>()))>>
-{
-    using type = std::move_iterator<Item*>;
-};
-
-/** Split one block task to several(max_block_size) iteration tasks for input iterators
-    @ingroup algorithms **/
-template <typename Body, typename Item>
-struct input_block_handling_task : public task {
-    static constexpr size_t max_block_size = 4;
-
-    using feeder_type = feeder_impl<Body, Item>;
-    using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type;
-    using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>;
-
-    input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context,
-                              const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc)
-        :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context),
-         my_execution_context(e_context), my_allocator(alloc)
-    {
-        auto item_it = block_iteration_space.begin();
-        for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) {
-            new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context);
-        }
-    }
-
-    void finalize(const execution_data& ed) {
-        my_root_wait_context.release();
-        my_allocator.delete_object(this, ed);
-    }
-
-    task* execute(execution_data& ed) override {
-        __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
-        for (std::size_t counter = 1; counter < my_size; ++counter) {
-            my_wait_context.reserve();
-            spawn(*(task_pool.begin() + counter), my_execution_context);
-        }
-        my_wait_context.reserve();
-        execute_and_wait(*task_pool.begin(), my_execution_context,
-                         my_wait_context,    my_execution_context);
-
-        // deallocate current task after children execution
-        finalize(ed);
-        return nullptr;
-    }
-
-    task* cancel(execution_data& ed) override {
-        finalize(ed);
-        return nullptr;
-    }
-
-    ~input_block_handling_task() {
-        for(std::size_t counter = 0; counter < max_block_size; ++counter) {
-            (task_pool.begin() + counter)->~iteration_task();
-            (block_iteration_space.begin() + counter)->~Item();
-        }
-    }
-
-    aligned_space<Item, max_block_size> block_iteration_space;
-    aligned_space<iteration_task, max_block_size> task_pool;
-    std::size_t my_size;
-    wait_context my_wait_context;
-    wait_context& my_root_wait_context;
-    task_group_context& my_execution_context;
-    small_object_allocator my_allocator;
-}; // class input_block_handling_task
-
-/** Split one block task to several(max_block_size) iteration tasks for forward iterators
-    @ingroup algorithms **/
-template <typename Iterator, typename Body, typename Item>
-struct forward_block_handling_task : public task {
-    static constexpr size_t max_block_size = 4;
-
-    using iteration_task = for_each_iteration_task<Iterator, Body, Item>;
-
-    forward_block_handling_task(Iterator first, std::size_t size,
-                                wait_context& w_context, task_group_context& e_context,
-                                const Body& body, feeder_impl<Body, Item>* feeder_ptr,
-                                small_object_allocator& alloc)
-        : my_size(size), my_wait_context(0), my_root_wait_context(w_context),
-          my_execution_context(e_context), my_allocator(alloc)
-    {
-        auto* task_it = task_pool.begin();
-        for (std::size_t i = 0; i < size; i++) {
-            new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context);
-            ++first;
-        }
-    }
-
-    void finalize(const execution_data& ed) {
-        my_root_wait_context.release();
-        my_allocator.delete_object(this, ed);
-    }
-
-    task* execute(execution_data& ed) override {
-        __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
-        for(std::size_t counter = 1; counter < my_size; ++counter) {
-            my_wait_context.reserve();
-            spawn(*(task_pool.begin() + counter), my_execution_context);
-        }
-        my_wait_context.reserve();
-        execute_and_wait(*task_pool.begin(), my_execution_context,
-                         my_wait_context,    my_execution_context);
-
-        // deallocate current task after children execution
-        finalize(ed);
-        return nullptr;
-    }
-
-    task* cancel(execution_data& ed) override {
-        finalize(ed);
-        return nullptr;
-    }
-
-    ~forward_block_handling_task() {
-        for(std::size_t counter = 0; counter < my_size; ++counter) {
-            (task_pool.begin() + counter)->~iteration_task();
-        }
-    }
-
-    aligned_space<iteration_task, max_block_size> task_pool;
-    std::size_t my_size;
-    wait_context my_wait_context;
-    wait_context& my_root_wait_context;
-    task_group_context& my_execution_context;
-    small_object_allocator my_allocator;
-}; // class forward_block_handling_task
-
-/** Body for parallel_for algorithm.
-  * Allows to redirect operations under random access iterators range to the parallel_for algorithm.
-    @ingroup algorithms **/
-template <typename Iterator, typename Body, typename Item>
-class parallel_for_body_wrapper {
-    Iterator my_first;
-    const Body& my_body;
-    feeder_impl<Body, Item>* my_feeder_ptr;
-public:
-    parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr)
-        : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {}
-
-    void operator()(tbb::blocked_range<std::size_t> range) const {
-#if __INTEL_COMPILER
-#pragma ivdep
-#endif
-        for (std::size_t count = range.begin(); count != range.end(); count++) {
-            parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count),
-                                                            my_feeder_ptr);
-        }
-    }
-}; // class parallel_for_body_wrapper
-
-
-/** Helper for getting iterators tag including inherited custom tags
-    @ingroup algorithms */
-template<typename It>
-using tag = typename std::iterator_traits<It>::iterator_category;
-
-template<typename It>
-using iterator_tag_dispatch = typename
-    std::conditional<
-        std::is_base_of<std::random_access_iterator_tag, tag<It>>::value,
-        std::random_access_iterator_tag,
-        typename std::conditional<
-            std::is_base_of<std::forward_iterator_tag, tag<It>>::value,
-            std::forward_iterator_tag,
-            std::input_iterator_tag
-        >::type
-    >::type;
-
-template <typename Body, typename Iterator, typename Item>
-using feeder_is_required = tbb::detail::void_t<decltype(std::declval<const Body>()(std::declval<typename std::iterator_traits<Iterator>::reference>(),
-                                                                                   std::declval<feeder<Item>&>()))>;
-
-// Creates feeder object only if the body can accept it
-template <typename Iterator, typename Body, typename Item, typename = void>
-struct feeder_holder {
-    feeder_holder( wait_context&, task_group_context&, const Body& ) {}
-
-    feeder_impl<Body, Item>* feeder_ptr() { return nullptr; }
-}; // class feeder_holder
-
-template <typename Iterator, typename Body, typename Item>
-class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> {
-public:
-    feeder_holder( wait_context& w_context, task_group_context& context, const Body& body )
-        : my_feeder(body, w_context, context) {}
-
-    feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; }
-private:
-    feeder_impl<Body, Item> my_feeder;
-}; // class feeder_holder
-
-template <typename Iterator, typename Body, typename Item>
-class for_each_root_task_base : public task {
-public:
-    for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context)
-        : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context),
-          my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body)
-    {
-        my_wait_context.reserve();
-    }
-private:
-    task* cancel(execution_data&) override {
-        this->my_wait_context.release();
-        return nullptr;
-    }
-protected:
-    Iterator my_first;
-    Iterator my_last;
-    wait_context& my_wait_context;
-    task_group_context& my_execution_context;
-    const Body& my_body;
-    feeder_holder<Iterator, Body, Item> my_feeder_holder;
-}; // class for_each_root_task_base
-
-/** parallel_for_each algorithm root task - most generic version
-  * Splits input range to blocks
-    @ingroup algorithms **/
-template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>>
-class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item>
-{
-    using base_type = for_each_root_task_base<Iterator, Body, Item>;
-public:
-    using base_type::base_type;
-private:
-    task* execute(execution_data& ed) override {
-        using block_handling_type = input_block_handling_task<Body, Item>;
-
-        if (this->my_first == this->my_last) {
-            this->my_wait_context.release();
-            return nullptr;
-        }
-
-        this->my_wait_context.reserve();
-        small_object_allocator alloc{};
-        auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context,
-                                                                         this->my_body, this->my_feeder_holder.feeder_ptr(),
-                                                                         alloc);
-
-        auto* block_iterator = block_handling_task->block_iteration_space.begin();
-        for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) {
-            // Move semantics are automatically used when supported by the iterator
-            new (block_iterator++) Item(*this->my_first);
-            ++block_handling_task->my_size;
-        }
-
-        // Do not access this after spawn to avoid races
-        spawn(*this, this->my_execution_context);
-        return block_handling_task;
-    }
-}; // class for_each_root_task - most generic implementation
-
-/** parallel_for_each algorithm root task - forward iterator based specialization
-  * Splits input range to blocks
-    @ingroup algorithms **/
-template <typename Iterator, typename Body, typename Item>
-class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag>
-    : public for_each_root_task_base<Iterator, Body, Item>
-{
-    using base_type = for_each_root_task_base<Iterator, Body, Item>;
-public:
-    using base_type::base_type;
-private:
-    task* execute(execution_data& ed) override {
-        using block_handling_type = forward_block_handling_task<Iterator, Body, Item>;
-        if (this->my_first == this->my_last) {
-            this->my_wait_context.release();
-            return nullptr;
-        }
-
-        std::size_t block_size{0};
-        Iterator first_block_element = this->my_first;
-        for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) {
-            ++block_size;
-        }
-
-        this->my_wait_context.reserve();
-        small_object_allocator alloc{};
-        auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size,
-                                                                         this->my_wait_context, this->my_execution_context,
-                                                                         this->my_body, this->my_feeder_holder.feeder_ptr(), alloc);
-
-        // Do not access this after spawn to avoid races
-        spawn(*this, this->my_execution_context);
-        return block_handling_task;
-    }
-}; // class for_each_root_task - forward iterator based specialization
-
-/** parallel_for_each algorithm root task - random access iterator based specialization
-  * Splits input range to blocks
-    @ingroup algorithms **/
-template <typename Iterator, typename Body, typename Item>
-class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag>
-    : public for_each_root_task_base<Iterator, Body, Item>
-{
-    using base_type = for_each_root_task_base<Iterator, Body, Item>;
-public:
-    using base_type::base_type;
-private:
-    task* execute(execution_data&) override {
-        tbb::parallel_for(
-            tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)),
-            parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr())
-            , this->my_execution_context
-        );
-
-        this->my_wait_context.release();
-        return nullptr;
-    }
-}; // class for_each_root_task - random access iterator based specialization
-
-/** Helper for getting item type. If item type can be deduced from feeder - got it from feeder,
-    if feeder is generic - got item type from range.
-    @ingroup algorithms */
-template<typename Body, typename Item, typename FeederArg>
-auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg;
-
-template<typename Body, typename>
-decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>)
-template<typename Body, typename Item> Item get_item_type_impl(...); // stub
-
-template <typename Body, typename Item>
-using get_item_type = decltype(get_item_type_impl<Body, Item>(0));
-
-/** Implements parallel iteration over a range.
-    @ingroup algorithms */
-template<typename Iterator, typename Body>
-void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context)
-{
-    if (!(first == last)) {
-        using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>;
-        wait_context w_context(0);
-
-        for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context);
-
-        execute_and_wait(root_task, context, w_context, context);
-    }
-}
-
-/** \page parallel_for_each_body_req Requirements on parallel_for_each body
-    Class \c Body implementing the concept of parallel_for_each body must define:
-    - \code
-        B::operator()(
-                cv_item_type item,
-                feeder<item_type>& feeder
-        ) const
-
-        OR
-
-        B::operator()( cv_item_type& item ) const
-      \endcode                                               Process item.
-                                                             May be invoked concurrently  for the same \c this but different \c item.
-
-    - \code item_type( const item_type& ) \endcode
-                                                             Copy a work item.
-    - \code ~item_type() \endcode                            Destroy a work item
-**/
-
-/** \name parallel_for_each
-    See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/
-//@{
-//! Parallel iteration over a range, with optional addition of more work.
-/** @ingroup algorithms */
-template<typename Iterator, typename Body>
-void parallel_for_each(Iterator first, Iterator last, const Body& body) {
-    task_group_context context(PARALLEL_FOR_EACH);
-    run_parallel_for_each<Iterator, Body>(first, last, body, context);
-}
-
-template<typename Range, typename Body>
-void parallel_for_each(Range& rng, const Body& body) {
-    parallel_for_each(std::begin(rng), std::end(rng), body);
-}
-
-template<typename Range, typename Body>
-void parallel_for_each(const Range& rng, const Body& body) {
-    parallel_for_each(std::begin(rng), std::end(rng), body);
-}
-
-//! Parallel iteration over a range, with optional addition of more work and user-supplied context
-/** @ingroup algorithms */
-template<typename Iterator, typename Body>
-void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) {
-    run_parallel_for_each<Iterator, Body>(first, last, body, context);
-}
-
-template<typename Range, typename Body>
-void parallel_for_each(Range& rng, const Body& body, task_group_context& context) {
-    parallel_for_each(std::begin(rng), std::end(rng), body, context);
-}
-
-template<typename Range, typename Body>
-void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) {
-    parallel_for_each(std::begin(rng), std::end(rng), body, context);
-}
-
-} // namespace d2
-} // namespace detail
-//! @endcond
-//@}
-
-inline namespace v1 {
-using detail::d2::parallel_for_each;
-using detail::d1::feeder;
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_parallel_for_each_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_for_each_H 
+#define __TBB_parallel_for_each_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_exception.h" 
+#include "detail/_task.h" 
+#include "detail/_aligned_space.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#include "parallel_for.h" 
+#include "task_group.h" // task_group_context 
+ 
+#include <iterator> 
+#include <type_traits> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d2 { 
+template<typename Body, typename Item> class feeder_impl; 
+} // namespace d2 
+ 
+namespace d1 { 
+//! Class the user supplied algorithm body uses to add new tasks 
+template<typename Item> 
+class feeder { 
+    feeder() {} 
+    feeder(const feeder&) = delete; 
+    void operator=( const feeder&) = delete; 
+ 
+    virtual ~feeder () {} 
+    virtual void internal_add_copy(const Item& item) = 0; 
+    virtual void internal_add_move(Item&& item) = 0; 
+ 
+    template<typename Body_, typename Item_> friend class detail::d2::feeder_impl; 
+public: 
+    //! Add a work item to a running parallel_for_each. 
+    void add(const Item& item) {internal_add_copy(item);} 
+    void add(Item&& item) {internal_add_move(std::move(item));} 
+}; 
+ 
+} // namespace d1 
+ 
+namespace d2 { 
+using namespace tbb::detail::d1; 
+/** Selects one of the two possible forms of function call member operator. 
+    @ingroup algorithms **/ 
+template<class Body> 
+struct parallel_for_each_operator_selector { 
+public: 
+    template<typename ItemArg, typename FeederArg> 
+    static auto call(const Body& body, ItemArg&& item, FeederArg*) 
+    -> decltype(body(std::forward<ItemArg>(item)), void()) { 
+        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+        // Suppression of Microsoft non-standard extension warnings 
+        #pragma warning (push) 
+        #pragma warning (disable: 4239) 
+        #endif 
+ 
+        body(std::forward<ItemArg>(item)); 
+ 
+        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+        #pragma warning (push) 
+        #endif 
+    } 
+ 
+    template<typename ItemArg, typename FeederArg> 
+    static auto call(const Body& body, ItemArg&& item, FeederArg* feeder) 
+    -> decltype(body(std::forward<ItemArg>(item), *feeder), void()) { 
+        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+        // Suppression of Microsoft non-standard extension warnings 
+        #pragma warning (push) 
+        #pragma warning (disable: 4239) 
+        #endif 
+        __TBB_ASSERT(feeder, "Feeder was not created but should be"); 
+ 
+        body(std::forward<ItemArg>(item), *feeder); 
+ 
+        #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+        #pragma warning (push) 
+        #endif 
+    } 
+}; 
+ 
+template<typename Body, typename Item> 
+struct feeder_item_task: public task { 
+    using feeder_type = feeder_impl<Body, Item>; 
+ 
+    template <typename ItemType> 
+    feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : 
+        item(std::forward<ItemType>(input_item)), 
+        my_feeder(feeder), 
+        my_allocator(alloc) 
+    {} 
+ 
+    void finalize(const execution_data& ed) { 
+        my_feeder.my_wait_context.release(); 
+        my_allocator.delete_object(this, ed); 
+    } 
+ 
+    //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy 
+    //! Executing body with moving the copy should have higher priority 
+    using first_priority = int; 
+    using second_priority = double; 
+ 
+    template <typename BodyType, typename ItemType, typename FeederType> 
+    static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority) 
+    -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void()) 
+    { 
+        parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder); 
+    } 
+ 
+    template <typename BodyType, typename ItemType, typename FeederType> 
+    static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) { 
+        parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder); 
+    } 
+ 
+    task* execute(execution_data& ed) override { 
+        call(my_feeder.my_body, item, my_feeder, first_priority{}); 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data& ed) override { 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    Item item; 
+    feeder_type& my_feeder; 
+    small_object_allocator my_allocator; 
+}; // class feeder_item_task 
+ 
+/** Implements new task adding procedure. 
+    @ingroup algorithms **/ 
+template<typename Body, typename Item> 
+class feeder_impl : public feeder<Item> { 
+    // Avoiding use of copy constructor in a virtual method if the type does not support it 
+    void internal_add_copy_impl(std::true_type, const Item& item) { 
+        using feeder_task = feeder_item_task<Body, Item>; 
+        small_object_allocator alloc; 
+        auto task = alloc.new_object<feeder_task>(item, *this, alloc); 
+ 
+        my_wait_context.reserve(); 
+        spawn(*task, my_execution_context); 
+    } 
+ 
+    void internal_add_copy_impl(std::false_type, const Item&) { 
+        __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object"); 
+    } 
+ 
+    void internal_add_copy(const Item& item) override { 
+        internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item); 
+    } 
+ 
+    void internal_add_move(Item&& item) override { 
+        using feeder_task = feeder_item_task<Body, Item>; 
+        small_object_allocator alloc{}; 
+        auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc); 
+ 
+        my_wait_context.reserve(); 
+        spawn(*task, my_execution_context); 
+    } 
+public: 
+    feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) 
+      : my_body(body), 
+        my_wait_context(w_context) 
+      , my_execution_context(context) 
+    {} 
+ 
+    const Body& my_body; 
+    wait_context& my_wait_context; 
+    task_group_context& my_execution_context; 
+}; // class feeder_impl 
+ 
+/** Execute computation under one element of the range 
+    @ingroup algorithms **/ 
+template<typename Iterator, typename Body, typename Item> 
+struct for_each_iteration_task: public task { 
+    using feeder_type = feeder_impl<Body, Item>; 
+ 
+    for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) : 
+        item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context) 
+    {} 
+ 
+    void finalize() { 
+        parent_wait_context.release(); 
+    } 
+ 
+    task* execute(execution_data&) override { 
+        parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr); 
+        finalize(); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data&) override { 
+        finalize(); 
+        return nullptr; 
+    } 
+ 
+    Iterator item_ptr; 
+    const Body& my_body; 
+    feeder_impl<Body, Item>* my_feeder_ptr; 
+    wait_context& parent_wait_context; 
+}; // class for_each_iteration_task 
+ 
+// Helper to get the type of the iterator to the internal sequence of copies 
+// If the element can be passed to the body as an rvalue - this iterator should be move_iterator 
+template <typename Body, typename Item, typename = void> 
+struct input_iteration_task_iterator_helper { 
+    // For input iterators we pass const lvalue reference to the body 
+    // It is prohibited to take non-constant lvalue references for input iterators 
+    using type = const Item*; 
+}; 
+ 
+template <typename Body, typename Item> 
+struct input_iteration_task_iterator_helper<Body, Item, 
+    tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(), 
+                                                                                 std::declval<Item&&>(), 
+                                                                                 std::declval<feeder_impl<Body, Item>*>()))>> 
+{ 
+    using type = std::move_iterator<Item*>; 
+}; 
+ 
+/** Split one block task to several(max_block_size) iteration tasks for input iterators 
+    @ingroup algorithms **/ 
+template <typename Body, typename Item> 
+struct input_block_handling_task : public task { 
+    static constexpr size_t max_block_size = 4; 
+ 
+    using feeder_type = feeder_impl<Body, Item>; 
+    using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type; 
+    using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>; 
+ 
+    input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, 
+                              const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) 
+        :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), 
+         my_execution_context(e_context), my_allocator(alloc) 
+    { 
+        auto item_it = block_iteration_space.begin(); 
+        for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) { 
+            new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context); 
+        } 
+    } 
+ 
+    void finalize(const execution_data& ed) { 
+        my_root_wait_context.release(); 
+        my_allocator.delete_object(this, ed); 
+    } 
+ 
+    task* execute(execution_data& ed) override { 
+        __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); 
+        for (std::size_t counter = 1; counter < my_size; ++counter) { 
+            my_wait_context.reserve(); 
+            spawn(*(task_pool.begin() + counter), my_execution_context); 
+        } 
+        my_wait_context.reserve(); 
+        execute_and_wait(*task_pool.begin(), my_execution_context, 
+                         my_wait_context,    my_execution_context); 
+ 
+        // deallocate current task after children execution 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data& ed) override { 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    ~input_block_handling_task() { 
+        for(std::size_t counter = 0; counter < max_block_size; ++counter) { 
+            (task_pool.begin() + counter)->~iteration_task(); 
+            (block_iteration_space.begin() + counter)->~Item(); 
+        } 
+    } 
+ 
+    aligned_space<Item, max_block_size> block_iteration_space; 
+    aligned_space<iteration_task, max_block_size> task_pool; 
+    std::size_t my_size; 
+    wait_context my_wait_context; 
+    wait_context& my_root_wait_context; 
+    task_group_context& my_execution_context; 
+    small_object_allocator my_allocator; 
+}; // class input_block_handling_task 
+ 
+/** Split one block task to several(max_block_size) iteration tasks for forward iterators 
+    @ingroup algorithms **/ 
+template <typename Iterator, typename Body, typename Item> 
+struct forward_block_handling_task : public task { 
+    static constexpr size_t max_block_size = 4; 
+ 
+    using iteration_task = for_each_iteration_task<Iterator, Body, Item>; 
+ 
+    forward_block_handling_task(Iterator first, std::size_t size, 
+                                wait_context& w_context, task_group_context& e_context, 
+                                const Body& body, feeder_impl<Body, Item>* feeder_ptr, 
+                                small_object_allocator& alloc) 
+        : my_size(size), my_wait_context(0), my_root_wait_context(w_context), 
+          my_execution_context(e_context), my_allocator(alloc) 
+    { 
+        auto* task_it = task_pool.begin(); 
+        for (std::size_t i = 0; i < size; i++) { 
+            new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context); 
+            ++first; 
+        } 
+    } 
+ 
+    void finalize(const execution_data& ed) { 
+        my_root_wait_context.release(); 
+        my_allocator.delete_object(this, ed); 
+    } 
+ 
+    task* execute(execution_data& ed) override { 
+        __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); 
+        for(std::size_t counter = 1; counter < my_size; ++counter) { 
+            my_wait_context.reserve(); 
+            spawn(*(task_pool.begin() + counter), my_execution_context); 
+        } 
+        my_wait_context.reserve(); 
+        execute_and_wait(*task_pool.begin(), my_execution_context, 
+                         my_wait_context,    my_execution_context); 
+ 
+        // deallocate current task after children execution 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data& ed) override { 
+        finalize(ed); 
+        return nullptr; 
+    } 
+ 
+    ~forward_block_handling_task() { 
+        for(std::size_t counter = 0; counter < my_size; ++counter) { 
+            (task_pool.begin() + counter)->~iteration_task(); 
+        } 
+    } 
+ 
+    aligned_space<iteration_task, max_block_size> task_pool; 
+    std::size_t my_size; 
+    wait_context my_wait_context; 
+    wait_context& my_root_wait_context; 
+    task_group_context& my_execution_context; 
+    small_object_allocator my_allocator; 
+}; // class forward_block_handling_task 
+ 
+/** Body for parallel_for algorithm. 
+  * Allows to redirect operations under random access iterators range to the parallel_for algorithm. 
+    @ingroup algorithms **/ 
+template <typename Iterator, typename Body, typename Item> 
+class parallel_for_body_wrapper { 
+    Iterator my_first; 
+    const Body& my_body; 
+    feeder_impl<Body, Item>* my_feeder_ptr; 
+public: 
+    parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr) 
+        : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {} 
+ 
+    void operator()(tbb::blocked_range<std::size_t> range) const { 
+#if __INTEL_COMPILER 
+#pragma ivdep 
+#endif 
+        for (std::size_t count = range.begin(); count != range.end(); count++) { 
+            parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count), 
+                                                            my_feeder_ptr); 
+        } 
+    } 
+}; // class parallel_for_body_wrapper 
+ 
+ 
+/** Helper for getting iterators tag including inherited custom tags 
+    @ingroup algorithms */ 
+template<typename It> 
+using tag = typename std::iterator_traits<It>::iterator_category; 
+ 
+template<typename It> 
+using iterator_tag_dispatch = typename 
+    std::conditional< 
+        std::is_base_of<std::random_access_iterator_tag, tag<It>>::value, 
+        std::random_access_iterator_tag, 
+        typename std::conditional< 
+            std::is_base_of<std::forward_iterator_tag, tag<It>>::value, 
+            std::forward_iterator_tag, 
+            std::input_iterator_tag 
+        >::type 
+    >::type; 
+ 
+template <typename Body, typename Iterator, typename Item> 
+using feeder_is_required = tbb::detail::void_t<decltype(std::declval<const Body>()(std::declval<typename std::iterator_traits<Iterator>::reference>(), 
+                                                                                   std::declval<feeder<Item>&>()))>; 
+ 
+// Creates feeder object only if the body can accept it 
+template <typename Iterator, typename Body, typename Item, typename = void> 
+struct feeder_holder { 
+    feeder_holder( wait_context&, task_group_context&, const Body& ) {} 
+ 
+    feeder_impl<Body, Item>* feeder_ptr() { return nullptr; } 
+}; // class feeder_holder 
+ 
+template <typename Iterator, typename Body, typename Item> 
+class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> { 
+public: 
+    feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) 
+        : my_feeder(body, w_context, context) {} 
+ 
+    feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; } 
+private: 
+    feeder_impl<Body, Item> my_feeder; 
+}; // class feeder_holder 
+ 
+template <typename Iterator, typename Body, typename Item> 
+class for_each_root_task_base : public task { 
+public: 
+    for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) 
+        : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), 
+          my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) 
+    { 
+        my_wait_context.reserve(); 
+    } 
+private: 
+    task* cancel(execution_data&) override { 
+        this->my_wait_context.release(); 
+        return nullptr; 
+    } 
+protected: 
+    Iterator my_first; 
+    Iterator my_last; 
+    wait_context& my_wait_context; 
+    task_group_context& my_execution_context; 
+    const Body& my_body; 
+    feeder_holder<Iterator, Body, Item> my_feeder_holder; 
+}; // class for_each_root_task_base 
+ 
+/** parallel_for_each algorithm root task - most generic version 
+  * Splits input range to blocks 
+    @ingroup algorithms **/ 
+template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>> 
+class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item> 
+{ 
+    using base_type = for_each_root_task_base<Iterator, Body, Item>; 
+public: 
+    using base_type::base_type; 
+private: 
+    task* execute(execution_data& ed) override { 
+        using block_handling_type = input_block_handling_task<Body, Item>; 
+ 
+        if (this->my_first == this->my_last) { 
+            this->my_wait_context.release(); 
+            return nullptr; 
+        } 
+ 
+        this->my_wait_context.reserve(); 
+        small_object_allocator alloc{}; 
+        auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context, 
+                                                                         this->my_body, this->my_feeder_holder.feeder_ptr(), 
+                                                                         alloc); 
+ 
+        auto* block_iterator = block_handling_task->block_iteration_space.begin(); 
+        for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) { 
+            // Move semantics are automatically used when supported by the iterator 
+            new (block_iterator++) Item(*this->my_first); 
+            ++block_handling_task->my_size; 
+        } 
+ 
+        // Do not access this after spawn to avoid races 
+        spawn(*this, this->my_execution_context); 
+        return block_handling_task; 
+    } 
+}; // class for_each_root_task - most generic implementation 
+ 
+/** parallel_for_each algorithm root task - forward iterator based specialization 
+  * Splits input range to blocks 
+    @ingroup algorithms **/ 
+template <typename Iterator, typename Body, typename Item> 
+class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag> 
+    : public for_each_root_task_base<Iterator, Body, Item> 
+{ 
+    using base_type = for_each_root_task_base<Iterator, Body, Item>; 
+public: 
+    using base_type::base_type; 
+private: 
+    task* execute(execution_data& ed) override { 
+        using block_handling_type = forward_block_handling_task<Iterator, Body, Item>; 
+        if (this->my_first == this->my_last) { 
+            this->my_wait_context.release(); 
+            return nullptr; 
+        } 
+ 
+        std::size_t block_size{0}; 
+        Iterator first_block_element = this->my_first; 
+        for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) { 
+            ++block_size; 
+        } 
+ 
+        this->my_wait_context.reserve(); 
+        small_object_allocator alloc{}; 
+        auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size, 
+                                                                         this->my_wait_context, this->my_execution_context, 
+                                                                         this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); 
+ 
+        // Do not access this after spawn to avoid races 
+        spawn(*this, this->my_execution_context); 
+        return block_handling_task; 
+    } 
+}; // class for_each_root_task - forward iterator based specialization 
+ 
+/** parallel_for_each algorithm root task - random access iterator based specialization 
+  * Splits input range to blocks 
+    @ingroup algorithms **/ 
+template <typename Iterator, typename Body, typename Item> 
+class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag> 
+    : public for_each_root_task_base<Iterator, Body, Item> 
+{ 
+    using base_type = for_each_root_task_base<Iterator, Body, Item>; 
+public: 
+    using base_type::base_type; 
+private: 
+    task* execute(execution_data&) override { 
+        tbb::parallel_for( 
+            tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)), 
+            parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr()) 
+            , this->my_execution_context 
+        ); 
+ 
+        this->my_wait_context.release(); 
+        return nullptr; 
+    } 
+}; // class for_each_root_task - random access iterator based specialization 
+ 
+/** Helper for getting item type. If item type can be deduced from feeder - got it from feeder, 
+    if feeder is generic - got item type from range. 
+    @ingroup algorithms */ 
+template<typename Body, typename Item, typename FeederArg> 
+auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg; 
+ 
+template<typename Body, typename> 
+decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>) 
+template<typename Body, typename Item> Item get_item_type_impl(...); // stub 
+ 
+template <typename Body, typename Item> 
+using get_item_type = decltype(get_item_type_impl<Body, Item>(0)); 
+ 
+/** Implements parallel iteration over a range. 
+    @ingroup algorithms */ 
+template<typename Iterator, typename Body> 
+void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context) 
+{ 
+    if (!(first == last)) { 
+        using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>; 
+        wait_context w_context(0); 
+ 
+        for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context); 
+ 
+        execute_and_wait(root_task, context, w_context, context); 
+    } 
+} 
+ 
+/** \page parallel_for_each_body_req Requirements on parallel_for_each body 
+    Class \c Body implementing the concept of parallel_for_each body must define: 
+    - \code 
+        B::operator()( 
+                cv_item_type item, 
+                feeder<item_type>& feeder 
+        ) const 
+ 
+        OR 
+ 
+        B::operator()( cv_item_type& item ) const 
+      \endcode                                               Process item. 
+                                                             May be invoked concurrently  for the same \c this but different \c item. 
+ 
+    - \code item_type( const item_type& ) \endcode 
+                                                             Copy a work item. 
+    - \code ~item_type() \endcode                            Destroy a work item 
+**/ 
+ 
+/** \name parallel_for_each 
+    See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/ 
+//@{ 
+//! Parallel iteration over a range, with optional addition of more work. 
+/** @ingroup algorithms */ 
+template<typename Iterator, typename Body> 
+void parallel_for_each(Iterator first, Iterator last, const Body& body) { 
+    task_group_context context(PARALLEL_FOR_EACH); 
+    run_parallel_for_each<Iterator, Body>(first, last, body, context); 
+} 
+ 
+template<typename Range, typename Body> 
+void parallel_for_each(Range& rng, const Body& body) { 
+    parallel_for_each(std::begin(rng), std::end(rng), body); 
+} 
+ 
+template<typename Range, typename Body> 
+void parallel_for_each(const Range& rng, const Body& body) { 
+    parallel_for_each(std::begin(rng), std::end(rng), body); 
+} 
+ 
+//! Parallel iteration over a range, with optional addition of more work and user-supplied context 
+/** @ingroup algorithms */ 
+template<typename Iterator, typename Body> 
+void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) { 
+    run_parallel_for_each<Iterator, Body>(first, last, body, context); 
+} 
+ 
+template<typename Range, typename Body> 
+void parallel_for_each(Range& rng, const Body& body, task_group_context& context) { 
+    parallel_for_each(std::begin(rng), std::end(rng), body, context); 
+} 
+ 
+template<typename Range, typename Body> 
+void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) { 
+    parallel_for_each(std::begin(rng), std::end(rng), body, context); 
+} 
+ 
+} // namespace d2 
+} // namespace detail 
+//! @endcond 
+//@} 
+ 
+inline namespace v1 { 
+using detail::d2::parallel_for_each; 
+using detail::d1::feeder; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_parallel_for_each_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h
index 6eb0f2e530..9138270b0c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h
@@ -1,227 +1,227 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_invoke_H
-#define __TBB_parallel_invoke_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_exception.h"
-#include "detail/_task.h"
-#include "detail/_template_helpers.h"
-#include "detail/_small_object_pool.h"
-
-#include "task_group.h"
-
-#include <tuple>
-#include <atomic>
-#include <utility>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Simple task object, executing user method
-template<typename Function, typename WaitObject>
-struct function_invoker : public task {
-    function_invoker(const Function& function, WaitObject& wait_ctx) :
-        my_function(function),
-        parent_wait_ctx(wait_ctx)
-    {}
-
-    task* execute(execution_data& ed) override {
-        my_function();
-        parent_wait_ctx.release(ed);
-        call_itt_task_notify(destroy, this);
-        return nullptr;
-    }
-
-    task* cancel(execution_data& ed) override {
-        parent_wait_ctx.release(ed);
-        return nullptr;
-    }
-
-    const Function& my_function;
-    WaitObject& parent_wait_ctx;
-}; // struct function_invoker
-
-//! Task object for managing subroots in trinary task trees.
-// Endowed with additional synchronization logic (compatible with wait object intefaces) to support
-// continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors
-// and then executes first functor by itself. But only the last executed functor must destruct and deallocate
-// the subroot task.
-template<typename F1, typename F2, typename F3>
-struct invoke_subroot_task : public task {
-    wait_context& root_wait_ctx;
-    std::atomic<unsigned> ref_count{0};
-    bool child_spawned = false;
-
-    const F1& self_invoked_functor;
-    function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker;
-    function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker;
-
-    task_group_context& my_execution_context;
-    small_object_allocator my_allocator;
-
-    invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context,
-                 small_object_allocator& alloc) :
-        root_wait_ctx(wait_ctx),
-        self_invoked_functor(f1),
-        f2_invoker(f2, *this),
-        f3_invoker(f3, *this),
-        my_execution_context(context),
-        my_allocator(alloc)
-    {
-        root_wait_ctx.reserve();
-    }
-
-    void finalize(const execution_data& ed) {
-        root_wait_ctx.release();
-
-        my_allocator.delete_object(this, ed);
-    }
-
-    void release(const execution_data& ed) {
-        __TBB_ASSERT(ref_count > 0, nullptr);
-        call_itt_task_notify(releasing, this);
-        if( --ref_count == 0 ) {
-            call_itt_task_notify(acquired, this);
-            finalize(ed);
-        }
-    }
-
-    task* execute(execution_data& ed) override {
-        ref_count.fetch_add(3, std::memory_order_relaxed);
-        spawn(f3_invoker, my_execution_context);
-        spawn(f2_invoker, my_execution_context);
-        self_invoked_functor();
-
-        release(ed);
-        return nullptr;
-    }
-
-    task* cancel(execution_data& ed) override {
-        if( ref_count > 0 ) { // detect children spawn
-            release(ed);
-        } else {
-            finalize(ed);
-        }
-        return nullptr;
-    }
-}; // struct subroot_task
-
-class invoke_root_task {
-public:
-    invoke_root_task(wait_context& wc) : my_wait_context(wc) {}
-    void release(const execution_data&) {
-        my_wait_context.release();
-    }
-private:
-    wait_context& my_wait_context;
-};
-
-template<typename F1>
-void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) {
-    root_wait_ctx.reserve(1);
-    invoke_root_task root(root_wait_ctx);
-    function_invoker<F1, invoke_root_task> invoker1(f1, root);
-
-    execute_and_wait(invoker1, context, root_wait_ctx, context);
-}
-
-template<typename F1, typename F2>
-void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) {
-    root_wait_ctx.reserve(2);
-    invoke_root_task root(root_wait_ctx);
-    function_invoker<F1, invoke_root_task> invoker1(f1, root);
-    function_invoker<F2, invoke_root_task> invoker2(f2, root);
-
-    spawn(invoker1, context);
-    execute_and_wait(invoker2, context, root_wait_ctx, context);
-}
-
-template<typename F1, typename F2, typename F3>
-void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) {
-    root_wait_ctx.reserve(3);
-    invoke_root_task root(root_wait_ctx);
-    function_invoker<F1, invoke_root_task> invoker1(f1, root);
-    function_invoker<F2, invoke_root_task> invoker2(f2, root);
-    function_invoker<F3, invoke_root_task> invoker3(f3, root);
-
-    //TODO: implement sub root for two tasks (measure performance)
-    spawn(invoker1, context);
-    spawn(invoker2, context);
-    execute_and_wait(invoker3, context, root_wait_ctx, context);
-}
-
-template<typename F1, typename F2, typename F3, typename... Fs>
-void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context,
-                                 const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) {
-    small_object_allocator alloc{};
-    auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc);
-    spawn(*sub_root, context);
-
-    invoke_recursive_separation(root_wait_ctx, context, fs...);
-}
-
-template<typename... Fs>
-void parallel_invoke_impl(task_group_context& context, const Fs&... fs) {
-    static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable");
-    wait_context root_wait_ctx{0};
-
-    invoke_recursive_separation(root_wait_ctx, context, fs...);
-}
-
-template<typename F1, typename... Fs>
-void parallel_invoke_impl(const F1& f1, const Fs&... fs) {
-    static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable");
-    task_group_context context(PARALLEL_INVOKE);
-    wait_context root_wait_ctx{0};
-
-    invoke_recursive_separation(root_wait_ctx, context, fs..., f1);
-}
-
-//! Passes last argument of variadic pack as first for handling user provided task_group_context
-template <typename Tuple, typename... Fs>
-struct invoke_helper;
-
-template <typename... Args, typename T, typename... Fs>
-struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {};
-
-template <typename... Fs, typename T/*task_group_context or callable*/>
-struct invoke_helper<std::tuple<Fs...>, T> {
-    void operator()(Fs&&... args, T&& t) {
-        parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...);
-    }
-};
-
-//! Parallel execution of several function objects
-// We need to pass parameter pack through forwarding reference,
-// since this pack may contain task_group_context that must be passed via lvalue non-const reference
-template<typename... Fs>
-void parallel_invoke(Fs&&... fs) {
-    invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::parallel_invoke;
-} // namespace v1
-
-} // namespace tbb
-#endif /* __TBB_parallel_invoke_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_invoke_H 
+#define __TBB_parallel_invoke_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_exception.h" 
+#include "detail/_task.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#include "task_group.h" 
+ 
+#include <tuple> 
+#include <atomic> 
+#include <utility> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Simple task object, executing user method 
+template<typename Function, typename WaitObject> 
+struct function_invoker : public task { 
+    function_invoker(const Function& function, WaitObject& wait_ctx) : 
+        my_function(function), 
+        parent_wait_ctx(wait_ctx) 
+    {} 
+ 
+    task* execute(execution_data& ed) override { 
+        my_function(); 
+        parent_wait_ctx.release(ed); 
+        call_itt_task_notify(destroy, this); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data& ed) override { 
+        parent_wait_ctx.release(ed); 
+        return nullptr; 
+    } 
+ 
+    const Function& my_function; 
+    WaitObject& parent_wait_ctx; 
+}; // struct function_invoker 
+ 
+//! Task object for managing subroots in trinary task trees. 
+// Endowed with additional synchronization logic (compatible with wait object intefaces) to support 
+// continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors 
+// and then executes first functor by itself. But only the last executed functor must destruct and deallocate 
+// the subroot task. 
+template<typename F1, typename F2, typename F3> 
+struct invoke_subroot_task : public task { 
+    wait_context& root_wait_ctx; 
+    std::atomic<unsigned> ref_count{0}; 
+    bool child_spawned = false; 
+ 
+    const F1& self_invoked_functor; 
+    function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker; 
+    function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker; 
+ 
+    task_group_context& my_execution_context; 
+    small_object_allocator my_allocator; 
+ 
+    invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context, 
+                 small_object_allocator& alloc) : 
+        root_wait_ctx(wait_ctx), 
+        self_invoked_functor(f1), 
+        f2_invoker(f2, *this), 
+        f3_invoker(f3, *this), 
+        my_execution_context(context), 
+        my_allocator(alloc) 
+    { 
+        root_wait_ctx.reserve(); 
+    } 
+ 
+    void finalize(const execution_data& ed) { 
+        root_wait_ctx.release(); 
+ 
+        my_allocator.delete_object(this, ed); 
+    } 
+ 
+    void release(const execution_data& ed) { 
+        __TBB_ASSERT(ref_count > 0, nullptr); 
+        call_itt_task_notify(releasing, this); 
+        if( --ref_count == 0 ) { 
+            call_itt_task_notify(acquired, this); 
+            finalize(ed); 
+        } 
+    } 
+ 
+    task* execute(execution_data& ed) override { 
+        ref_count.fetch_add(3, std::memory_order_relaxed); 
+        spawn(f3_invoker, my_execution_context); 
+        spawn(f2_invoker, my_execution_context); 
+        self_invoked_functor(); 
+ 
+        release(ed); 
+        return nullptr; 
+    } 
+ 
+    task* cancel(execution_data& ed) override { 
+        if( ref_count > 0 ) { // detect children spawn 
+            release(ed); 
+        } else { 
+            finalize(ed); 
+        } 
+        return nullptr; 
+    } 
+}; // struct subroot_task 
+ 
+class invoke_root_task { 
+public: 
+    invoke_root_task(wait_context& wc) : my_wait_context(wc) {} 
+    void release(const execution_data&) { 
+        my_wait_context.release(); 
+    } 
+private: 
+    wait_context& my_wait_context; 
+}; 
+ 
+template<typename F1> 
+void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) { 
+    root_wait_ctx.reserve(1); 
+    invoke_root_task root(root_wait_ctx); 
+    function_invoker<F1, invoke_root_task> invoker1(f1, root); 
+ 
+    execute_and_wait(invoker1, context, root_wait_ctx, context); 
+} 
+ 
+template<typename F1, typename F2> 
+void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) { 
+    root_wait_ctx.reserve(2); 
+    invoke_root_task root(root_wait_ctx); 
+    function_invoker<F1, invoke_root_task> invoker1(f1, root); 
+    function_invoker<F2, invoke_root_task> invoker2(f2, root); 
+ 
+    spawn(invoker1, context); 
+    execute_and_wait(invoker2, context, root_wait_ctx, context); 
+} 
+ 
+template<typename F1, typename F2, typename F3> 
+void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) { 
+    root_wait_ctx.reserve(3); 
+    invoke_root_task root(root_wait_ctx); 
+    function_invoker<F1, invoke_root_task> invoker1(f1, root); 
+    function_invoker<F2, invoke_root_task> invoker2(f2, root); 
+    function_invoker<F3, invoke_root_task> invoker3(f3, root); 
+ 
+    //TODO: implement sub root for two tasks (measure performance) 
+    spawn(invoker1, context); 
+    spawn(invoker2, context); 
+    execute_and_wait(invoker3, context, root_wait_ctx, context); 
+} 
+ 
+template<typename F1, typename F2, typename F3, typename... Fs> 
+void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, 
+                                 const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) { 
+    small_object_allocator alloc{}; 
+    auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc); 
+    spawn(*sub_root, context); 
+ 
+    invoke_recursive_separation(root_wait_ctx, context, fs...); 
+} 
+ 
+template<typename... Fs> 
+void parallel_invoke_impl(task_group_context& context, const Fs&... fs) { 
+    static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable"); 
+    wait_context root_wait_ctx{0}; 
+ 
+    invoke_recursive_separation(root_wait_ctx, context, fs...); 
+} 
+ 
+template<typename F1, typename... Fs> 
+void parallel_invoke_impl(const F1& f1, const Fs&... fs) { 
+    static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable"); 
+    task_group_context context(PARALLEL_INVOKE); 
+    wait_context root_wait_ctx{0}; 
+ 
+    invoke_recursive_separation(root_wait_ctx, context, fs..., f1); 
+} 
+ 
+//! Passes last argument of variadic pack as first for handling user provided task_group_context 
+template <typename Tuple, typename... Fs> 
+struct invoke_helper; 
+ 
+template <typename... Args, typename T, typename... Fs> 
+struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {}; 
+ 
+template <typename... Fs, typename T/*task_group_context or callable*/> 
+struct invoke_helper<std::tuple<Fs...>, T> { 
+    void operator()(Fs&&... args, T&& t) { 
+        parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...); 
+    } 
+}; 
+ 
+//! Parallel execution of several function objects 
+// We need to pass parameter pack through forwarding reference, 
+// since this pack may contain task_group_context that must be passed via lvalue non-const reference 
+template<typename... Fs> 
+void parallel_invoke(Fs&&... fs) { 
+    invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::parallel_invoke; 
+} // namespace v1 
+ 
+} // namespace tbb 
+#endif /* __TBB_parallel_invoke_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h
index 87a159c925..c2a21bc798 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h
@@ -1,153 +1,153 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_pipeline_H
-#define __TBB_parallel_pipeline_H
-
-#include "detail/_pipeline_filters.h"
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "task_group.h"
-
-#include <cstddef>
-#include <atomic>
-#include <type_traits>
-
-namespace tbb {
-namespace detail {
-
-namespace r1 {
-void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&);
-}
-
-namespace d1 {
-
-enum class filter_mode : unsigned int
-{
-    //! processes multiple items in parallel and in no particular order
-    parallel = base_filter::filter_is_out_of_order,
-    //! processes items one at a time; all such filters process items in the same order
-    serial_in_order =  base_filter::filter_is_serial,
-    //! processes items one at a time and in no particular order
-    serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order
-};
-//! Class representing a chain of type-safe pipeline filters
-/** @ingroup algorithms */
-template<typename InputType, typename OutputType>
-class filter {
-    filter_node_ptr my_root;
-    filter( filter_node_ptr root ) : my_root(root) {}
-    friend void parallel_pipeline( size_t, const filter<void,void>&, task_group_context& );
-    template<typename T_, typename U_, typename Body>
-    friend filter<T_,U_> make_filter( filter_mode, const Body& );
-    template<typename T_, typename V_, typename U_>
-    friend filter<T_,U_> operator&( const filter<T_,V_>&, const filter<V_,U_>& );
-public:
-    filter() = default;
-    filter( const filter& rhs ) : my_root(rhs.my_root) {}
-    filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {}
-
-    void operator=(const filter& rhs) {
-        my_root = rhs.my_root;
-    }
-    void operator=( filter&& rhs ) {
-        my_root = std::move(rhs.my_root);
-    }
-
-    template<typename Body>
-    filter( filter_mode mode, const Body& body ) :
-        my_root( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>)))
-                    filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ) {
-    }
-
-    filter& operator&=( const filter<OutputType,OutputType>& right ) {
-        *this = *this & right;
-        return *this;
-    }
-
-    void clear() {
-        // Like operator= with filter() on right side.
-        my_root = nullptr;
-    }
-};
-
-//! Create a filter to participate in parallel_pipeline
-/** @ingroup algorithms */
-template<typename InputType, typename OutputType, typename Body>
-filter<InputType, OutputType> make_filter( filter_mode mode, const Body& body ) {
-    return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>)))
-                                filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) );
-}
-
-//! Create a filter to participate in parallel_pipeline
-/** @ingroup algorithms */
-template<typename Body>
-filter<filter_input<Body>, filter_output<Body>> make_filter( filter_mode mode, const Body& body ) {
-    return make_filter<filter_input<Body>, filter_output<Body>>(mode, body);
-}
-
-//! Composition of filters left and right.
-/** @ingroup algorithms */
-template<typename T, typename V, typename U>
-filter<T,U> operator&( const filter<T,V>& left, const filter<V,U>& right ) {
-    __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'");
-    __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'");
-    return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) );
-}
-
-#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-template<typename Body>
-filter(filter_mode, Body)
-->filter<filter_input<Body>, filter_output<Body>>;
-#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
-
-//! Parallel pipeline over chain of filters with user-supplied context.
-/** @ingroup algorithms **/
-inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain, task_group_context& context) {
-    r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root);
-}
-
-//! Parallel pipeline over chain of filters.
-/** @ingroup algorithms **/
-inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain) {
-    task_group_context context;
-    parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
-}
-
-//! Parallel pipeline over sequence of filters.
-/** @ingroup algorithms **/
-template<typename F1, typename F2, typename... FiltersContext>
-void parallel_pipeline(size_t max_number_of_live_tokens,
-                              const F1& filter1,
-                              const F2& filter2,
-                              FiltersContext&&... filters) {
-    parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward<FiltersContext>(filters)...);
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1
-{
-using detail::d1::parallel_pipeline;
-using detail::d1::filter;
-using detail::d1::make_filter;
-using detail::d1::filter_mode;
-using detail::d1::flow_control;
-}
-} // tbb
-
-#endif /* __TBB_parallel_pipeline_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_pipeline_H 
+#define __TBB_parallel_pipeline_H 
+ 
+#include "detail/_pipeline_filters.h" 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "task_group.h" 
+ 
+#include <cstddef> 
+#include <atomic> 
+#include <type_traits> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace r1 { 
+void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); 
+} 
+ 
+namespace d1 { 
+ 
+enum class filter_mode : unsigned int 
+{ 
+    //! processes multiple items in parallel and in no particular order 
+    parallel = base_filter::filter_is_out_of_order, 
+    //! processes items one at a time; all such filters process items in the same order 
+    serial_in_order =  base_filter::filter_is_serial, 
+    //! processes items one at a time and in no particular order 
+    serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order 
+}; 
+//! Class representing a chain of type-safe pipeline filters 
+/** @ingroup algorithms */ 
+template<typename InputType, typename OutputType> 
+class filter { 
+    filter_node_ptr my_root; 
+    filter( filter_node_ptr root ) : my_root(root) {} 
+    friend void parallel_pipeline( size_t, const filter<void,void>&, task_group_context& ); 
+    template<typename T_, typename U_, typename Body> 
+    friend filter<T_,U_> make_filter( filter_mode, const Body& ); 
+    template<typename T_, typename V_, typename U_> 
+    friend filter<T_,U_> operator&( const filter<T_,V_>&, const filter<V_,U_>& ); 
+public: 
+    filter() = default; 
+    filter( const filter& rhs ) : my_root(rhs.my_root) {} 
+    filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {} 
+ 
+    void operator=(const filter& rhs) { 
+        my_root = rhs.my_root; 
+    } 
+    void operator=( filter&& rhs ) { 
+        my_root = std::move(rhs.my_root); 
+    } 
+ 
+    template<typename Body> 
+    filter( filter_mode mode, const Body& body ) : 
+        my_root( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) 
+                    filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ) { 
+    } 
+ 
+    filter& operator&=( const filter<OutputType,OutputType>& right ) { 
+        *this = *this & right; 
+        return *this; 
+    } 
+ 
+    void clear() { 
+        // Like operator= with filter() on right side. 
+        my_root = nullptr; 
+    } 
+}; 
+ 
+//! Create a filter to participate in parallel_pipeline 
+/** @ingroup algorithms */ 
+template<typename InputType, typename OutputType, typename Body> 
+filter<InputType, OutputType> make_filter( filter_mode mode, const Body& body ) { 
+    return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) 
+                                filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ); 
+} 
+ 
+//! Create a filter to participate in parallel_pipeline 
+/** @ingroup algorithms */ 
+template<typename Body> 
+filter<filter_input<Body>, filter_output<Body>> make_filter( filter_mode mode, const Body& body ) { 
+    return make_filter<filter_input<Body>, filter_output<Body>>(mode, body); 
+} 
+ 
+//! Composition of filters left and right. 
+/** @ingroup algorithms */ 
+template<typename T, typename V, typename U> 
+filter<T,U> operator&( const filter<T,V>& left, const filter<V,U>& right ) { 
+    __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'"); 
+    __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'"); 
+    return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) ); 
+} 
+ 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+template<typename Body> 
+filter(filter_mode, Body) 
+->filter<filter_input<Body>, filter_output<Body>>; 
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 
+ 
+//! Parallel pipeline over chain of filters with user-supplied context. 
+/** @ingroup algorithms **/ 
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain, task_group_context& context) { 
+    r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root); 
+} 
+ 
+//! Parallel pipeline over chain of filters. 
+/** @ingroup algorithms **/ 
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain) { 
+    task_group_context context; 
+    parallel_pipeline(max_number_of_live_tokens, filter_chain, context); 
+} 
+ 
+//! Parallel pipeline over sequence of filters. 
+/** @ingroup algorithms **/ 
+template<typename F1, typename F2, typename... FiltersContext> 
+void parallel_pipeline(size_t max_number_of_live_tokens, 
+                              const F1& filter1, 
+                              const F2& filter2, 
+                              FiltersContext&&... filters) { 
+    parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward<FiltersContext>(filters)...); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 
+{ 
+using detail::d1::parallel_pipeline; 
+using detail::d1::filter; 
+using detail::d1::make_filter; 
+using detail::d1::filter_mode; 
+using detail::d1::flow_control; 
+} 
+} // tbb 
+ 
+#endif /* __TBB_parallel_pipeline_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h
index 6db6369d68..e41cc29449 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h
@@ -1,689 +1,689 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_reduce_H
-#define __TBB_parallel_reduce_H
-
-#include <new>
-#include "detail/_namespace_injection.h"
-#include "detail/_task.h"
-#include "detail/_aligned_space.h"
-#include "detail/_small_object_pool.h"
-
-#include "task_group.h" // task_group_context
-#include "partitioner.h"
-#include "profiling.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Tree node type for parallel_reduce.
-/** @ingroup algorithms */
-//TODO: consider folding tree via bypass execution(instead of manual folding)
-// for better cancellation and critical tasks handling (performance measurements required).
-template<typename Body>
-struct reduction_tree_node : public tree_node {
-    tbb::detail::aligned_space<Body> zombie_space;
-    Body& left_body;
-    bool has_right_zombie{false};
-
-    reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
-        tree_node{parent, ref_count, alloc},
-        left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */
-    {}
-
-    void join(task_group_context* context) {
-        if (has_right_zombie && !context->is_group_execution_cancelled())
-            left_body.join(*zombie_space.begin());
-    }
-
-    ~reduction_tree_node() {
-        if( has_right_zombie ) zombie_space.begin()->~Body();
-    }
-};
-
-//! Task type used to split the work of parallel_reduce.
-/** @ingroup algorithms */
-template<typename Range, typename Body, typename Partitioner>
-struct start_reduce : public task {
-    Range my_range;
-    Body* my_body;
-    node* my_parent;
-
-    typename Partitioner::task_partition_type my_partition;
-    small_object_allocator my_allocator;
-    bool is_right_child;
-
-    task* execute(execution_data&) override;
-    task* cancel(execution_data&) override;
-    void finalize(const execution_data&);
-
-    using tree_node_type = reduction_tree_node<Body>;
-
-    //! Constructor reduce root task.
-    start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
-        my_range(range),
-        my_body(&body),
-        my_partition(partitioner),
-        my_allocator(alloc),
-        is_right_child(false) {}
-    //! Splitting constructor used to generate children.
-    /** parent_ becomes left child. Newly constructed object is right child. */
-    start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
-        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
-        my_body(parent_.my_body),
-        my_partition(parent_.my_partition, split_obj),
-        my_allocator(alloc),
-        is_right_child(true)
-    {
-        parent_.is_right_child = false;
-    }
-    //! Construct right child from the given range as response to the demand.
-    /** parent_ remains left child. Newly constructed object is right child. */
-    start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
-        my_range(r),
-        my_body(parent_.my_body),
-        my_partition(parent_.my_partition, split()),
-        my_allocator(alloc),
-        is_right_child(true)
-    {
-        my_partition.align_depth( d );
-        parent_.is_right_child = false;
-    }
-    static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
-        if ( !range.empty() ) {
-            wait_node wn;
-            small_object_allocator alloc{};
-            auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
-            reduce_task->my_parent = &wn;
-            execute_and_wait(*reduce_task, context, wn.m_wait, context);
-        }
-    }
-    static void run(const Range& range, Body& body, Partitioner& partitioner) {
-        // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
-        // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
-        task_group_context context(PARALLEL_REDUCE);
-        run(range, body, partitioner, context);
-    }
-    //! Run body for range, serves as callback for partitioner
-    void run_body( Range &r ) {
-        (*my_body)(r);
-    }
-
-    //! spawn right task, serves as callback for partitioner
-    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
-        offer_work_impl(ed, *this, split_obj);
-    }
-    //! spawn right task, serves as callback for partitioner
-    void offer_work(const Range& r, depth_t d, execution_data& ed) {
-        offer_work_impl(ed, *this, r, d);
-    }
-
-private:
-    template <typename... Args>
-    void offer_work_impl(execution_data& ed, Args&&... args) {
-        small_object_allocator alloc{};
-        // New right child
-        auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
-
-        // New root node as a continuation and ref count. Left and right child attach to the new parent.
-        right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc);
-
-        // Spawn the right sibling
-        right_child->spawn_self(ed);
-    }
-
-    void spawn_self(execution_data& ed) {
-        my_partition.spawn_task(*this, *context(ed));
-    }
-};
-
-//! fold the tree and deallocate the task
-template<typename Range, typename Body, typename Partitioner>
-void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
-    // Get the current parent and wait object before an object destruction
-    node* parent = my_parent;
-    auto allocator = my_allocator;
-    // Task execution finished - destroy it
-    this->~start_reduce();
-    // Unwind the tree decrementing the parent`s reference count
-    fold_tree<tree_node_type>(parent, ed);
-    allocator.deallocate(this, ed);
-}
-
-//! Execute parallel_reduce task
-template<typename Range, typename Body, typename Partitioner>
-task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
-    if (!is_same_affinity(ed)) {
-        my_partition.note_affinity(execution_slot(ed));
-    }
-    my_partition.check_being_stolen(*this, ed);
-
-    // The acquire barrier synchronizes the data pointed with my_body if the left
-    // task has already finished.
-    if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) {
-        tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
-        my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split());
-        parent_ptr->has_right_zombie = true;
-    }
-    __TBB_ASSERT(my_body != nullptr, "Incorrect body value");
-
-    my_partition.execute(*this, my_range, ed);
-
-    finalize(ed);
-    return nullptr;
-}
-
-//! Cancel parallel_reduce task
-template<typename Range, typename Body, typename Partitioner>
-task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
-    finalize(ed);
-    return nullptr;
-}
-
-//! Tree node type for parallel_deterministic_reduce.
-/** @ingroup algorithms */
-template<typename Body>
-struct deterministic_reduction_tree_node : public tree_node {
-    Body right_body;
-    Body& left_body;
-
-    deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
-        tree_node{parent, ref_count, alloc},
-        right_body{input_left_body, detail::split()},
-        left_body(input_left_body)
-    {}
-
-    void join(task_group_context* context) {
-        if (!context->is_group_execution_cancelled())
-            left_body.join(right_body);
-    }
-};
-
-//! Task type used to split the work of parallel_deterministic_reduce.
-/** @ingroup algorithms */
-template<typename Range, typename Body, typename Partitioner>
-struct start_deterministic_reduce : public task {
-    Range my_range;
-    Body& my_body;
-    node* my_parent;
-
-    typename Partitioner::task_partition_type my_partition;
-    small_object_allocator my_allocator;
-
-    task* execute(execution_data&) override;
-    task* cancel(execution_data&) override;
-    void finalize(const execution_data&);
-
-    using tree_node_type = deterministic_reduction_tree_node<Body>;
-
-    //! Constructor deterministic_reduce root task.
-    start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
-        my_range(range),
-        my_body(body),
-        my_partition(partitioner),
-        my_allocator(alloc) {}
-    //! Splitting constructor used to generate children.
-    /** parent_ becomes left child.  Newly constructed object is right child. */
-    start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
-                                small_object_allocator& alloc ) :
-        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
-        my_body(body),
-        my_partition(parent_.my_partition, split_obj),
-        my_allocator(alloc) {}
-    static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
-        if ( !range.empty() ) {
-            wait_node wn;
-            small_object_allocator alloc{};
-            auto deterministic_reduce_task =
-                alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
-            deterministic_reduce_task->my_parent = &wn;
-            execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
-        }
-    }
-    static void run(const Range& range, Body& body, Partitioner& partitioner) {
-        // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
-        // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
-        // in the try-block.
-        task_group_context context(PARALLEL_REDUCE);
-        run(range, body, partitioner, context);
-    }
-    //! Run body for range, serves as callback for partitioner
-    void run_body( Range &r ) {
-        my_body( r );
-    }
-    //! Spawn right task, serves as callback for partitioner
-    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
-        offer_work_impl(ed, *this, split_obj);
-    }
-private:
-    template <typename... Args>
-    void offer_work_impl(execution_data& ed, Args&&... args) {
-        small_object_allocator alloc{};
-        // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
-        auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc);
-
-        // New right child
-        auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
-
-        right_child->my_parent = my_parent = new_tree_node;
-
-        // Spawn the right sibling
-        right_child->spawn_self(ed);
-    }
-
-    void spawn_self(execution_data& ed) {
-        my_partition.spawn_task(*this, *context(ed));
-    }
-};
-
-//! Fold the tree and deallocate the task
-template<typename Range, typename Body, typename Partitioner>
-void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
-    // Get the current parent and wait object before an object destruction
-    node* parent = my_parent;
-
-    auto allocator = my_allocator;
-    // Task execution finished - destroy it
-    this->~start_deterministic_reduce();
-    // Unwind the tree decrementing the parent`s reference count
-    fold_tree<tree_node_type>(parent, ed);
-    allocator.deallocate(this, ed);
-}
-
-//! Execute parallel_deterministic_reduce task
-template<typename Range, typename Body, typename Partitioner>
-task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
-    if (!is_same_affinity(ed)) {
-        my_partition.note_affinity(execution_slot(ed));
-    }
-    my_partition.check_being_stolen(*this, ed);
-
-    my_partition.execute(*this, my_range, ed);
-
-    finalize(ed);
-    return NULL;
-}
-
-//! Cancel parallel_deterministic_reduce task
-template<typename Range, typename Body, typename Partitioner>
-task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
-    finalize(ed);
-    return NULL;
-}
-
-
-//! Auxiliary class for parallel_reduce; for internal use only.
-/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
-    using given \ref parallel_reduce_lambda_req "anonymous function objects".
- **/
-/** @ingroup algorithms */
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-class lambda_reduce_body {
-//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
-//       (might require some performance measurements)
-
-    const Value&     my_identity_element;
-    const RealBody&  my_real_body;
-    const Reduction& my_reduction;
-    Value            my_value;
-    lambda_reduce_body& operator= ( const lambda_reduce_body& other );
-public:
-    lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
-        : my_identity_element(identity)
-        , my_real_body(body)
-        , my_reduction(reduction)
-        , my_value(identity)
-    { }
-    lambda_reduce_body( const lambda_reduce_body& other ) = default;
-    lambda_reduce_body( lambda_reduce_body& other, tbb::split )
-        : my_identity_element(other.my_identity_element)
-        , my_real_body(other.my_real_body)
-        , my_reduction(other.my_reduction)
-        , my_value(other.my_identity_element)
-    { }
-    void operator()(Range& range) {
-        my_value = my_real_body(range, const_cast<const Value&>(my_value));
-    }
-    void join( lambda_reduce_body& rhs ) {
-        my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
-    }
-    Value result() const {
-        return my_value;
-    }
-};
-
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_reduce_body_req Requirements on parallel_reduce body
-    Class \c Body implementing the concept of parallel_reduce body must define:
-    - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
-                                                        Must be able to run concurrently with operator() and method \c join
-    - \code Body::~Body(); \endcode                     Destructor
-    - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
-                                                        and accumulating the result
-    - \code void Body::join( Body& b ); \endcode        Join results.
-                                                        The result in \c b should be merged into the result of \c this
-**/
-
-/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
-    TO BE DOCUMENTED
-**/
-
-/** \name parallel_reduce
-    See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
-//@{
-
-//! Parallel iteration with reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body ) {
-    start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
-}
-
-//! Parallel iteration with reduction and simple_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
-    start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
-    start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and static_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
-    start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction and affinity_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
-    start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
-}
-
-//! Parallel iteration with reduction, default partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
-    start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
-}
-
-//! Parallel iteration with reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
-    start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, auto_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
-    start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, static_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
-    start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
-}
-
-//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
-    start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
-}
-/** parallel_reduce overloads that work with anonymous function objects
-    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
-
-//! Parallel iteration with reduction and default partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
-                          ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and simple_partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const simple_partitioner& partitioner ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
-                          ::run(range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const auto_partitioner& partitioner ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
-                          ::run( range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and static_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const static_partitioner& partitioner ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
-                                        ::run( range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction and affinity_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       affinity_partitioner& partitioner ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
-                                        ::run( range, body, partitioner );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, default partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       task_group_context& context ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
-                          ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const simple_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
-                          ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, auto_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const auto_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
-                          ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, static_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       const static_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
-                                        ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-                       affinity_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
-                                        ::run( range, body, partitioner, context );
-    return body.result();
-}
-
-//! Parallel iteration with deterministic reduction and default simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body ) {
-    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
-}
-
-//! Parallel iteration with deterministic reduction and simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
-    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
-}
-
-//! Parallel iteration with deterministic reduction and static partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
-    start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
-}
-
-//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
-    start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
-}
-
-//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
-    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
-}
-
-//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
-    start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
-}
-
-/** parallel_reduce overloads that work with anonymous function objects
-    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
-
-//! Parallel iteration with deterministic reduction and default simple partitioner.
-// TODO: consider making static_partitioner the default
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
-    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
-}
-
-//! Parallel iteration with deterministic reduction and simple partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
-    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
-    start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
-                          ::run(range, body, partitioner);
-    return body.result();
-}
-
-//! Parallel iteration with deterministic reduction and static partitioner.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
-    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
-    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
-        ::run(range, body, partitioner);
-    return body.result();
-}
-
-//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-    task_group_context& context ) {
-    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
-}
-
-//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-    const simple_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
-    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
-        ::run(range, body, partitioner, context);
-    return body.result();
-}
-
-//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename RealBody, typename Reduction>
-Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
-    const static_partitioner& partitioner, task_group_context& context ) {
-    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
-    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
-        ::run(range, body, partitioner, context);
-    return body.result();
-}
-//@}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::parallel_reduce;
-using detail::d1::parallel_deterministic_reduce;
-// Split types
-using detail::split;
-using detail::proportional_split;
-} // namespace v1
-
-} // namespace tbb
-#endif /* __TBB_parallel_reduce_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_reduce_H 
+#define __TBB_parallel_reduce_H 
+ 
+#include <new> 
+#include "detail/_namespace_injection.h" 
+#include "detail/_task.h" 
+#include "detail/_aligned_space.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#include "task_group.h" // task_group_context 
+#include "partitioner.h" 
+#include "profiling.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Tree node type for parallel_reduce. 
+/** @ingroup algorithms */ 
+//TODO: consider folding tree via bypass execution(instead of manual folding) 
+// for better cancellation and critical tasks handling (performance measurements required). 
+template<typename Body> 
+struct reduction_tree_node : public tree_node { 
+    tbb::detail::aligned_space<Body> zombie_space; 
+    Body& left_body; 
+    bool has_right_zombie{false}; 
+ 
+    reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 
+        tree_node{parent, ref_count, alloc}, 
+        left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ 
+    {} 
+ 
+    void join(task_group_context* context) { 
+        if (has_right_zombie && !context->is_group_execution_cancelled()) 
+            left_body.join(*zombie_space.begin()); 
+    } 
+ 
+    ~reduction_tree_node() { 
+        if( has_right_zombie ) zombie_space.begin()->~Body(); 
+    } 
+}; 
+ 
+//! Task type used to split the work of parallel_reduce. 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body, typename Partitioner> 
+struct start_reduce : public task { 
+    Range my_range; 
+    Body* my_body; 
+    node* my_parent; 
+ 
+    typename Partitioner::task_partition_type my_partition; 
+    small_object_allocator my_allocator; 
+    bool is_right_child; 
+ 
+    task* execute(execution_data&) override; 
+    task* cancel(execution_data&) override; 
+    void finalize(const execution_data&); 
+ 
+    using tree_node_type = reduction_tree_node<Body>; 
+ 
+    //! Constructor reduce root task. 
+    start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : 
+        my_range(range), 
+        my_body(&body), 
+        my_partition(partitioner), 
+        my_allocator(alloc), 
+        is_right_child(false) {} 
+    //! Splitting constructor used to generate children. 
+    /** parent_ becomes left child. Newly constructed object is right child. */ 
+    start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : 
+        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 
+        my_body(parent_.my_body), 
+        my_partition(parent_.my_partition, split_obj), 
+        my_allocator(alloc), 
+        is_right_child(true) 
+    { 
+        parent_.is_right_child = false; 
+    } 
+    //! Construct right child from the given range as response to the demand. 
+    /** parent_ remains left child. Newly constructed object is right child. */ 
+    start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : 
+        my_range(r), 
+        my_body(parent_.my_body), 
+        my_partition(parent_.my_partition, split()), 
+        my_allocator(alloc), 
+        is_right_child(true) 
+    { 
+        my_partition.align_depth( d ); 
+        parent_.is_right_child = false; 
+    } 
+    static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 
+        if ( !range.empty() ) { 
+            wait_node wn; 
+            small_object_allocator alloc{}; 
+            auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); 
+            reduce_task->my_parent = &wn; 
+            execute_and_wait(*reduce_task, context, wn.m_wait, context); 
+        } 
+    } 
+    static void run(const Range& range, Body& body, Partitioner& partitioner) { 
+        // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 
+        // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. 
+        task_group_context context(PARALLEL_REDUCE); 
+        run(range, body, partitioner, context); 
+    } 
+    //! Run body for range, serves as callback for partitioner 
+    void run_body( Range &r ) { 
+        (*my_body)(r); 
+    } 
+ 
+    //! spawn right task, serves as callback for partitioner 
+    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 
+        offer_work_impl(ed, *this, split_obj); 
+    } 
+    //! spawn right task, serves as callback for partitioner 
+    void offer_work(const Range& r, depth_t d, execution_data& ed) { 
+        offer_work_impl(ed, *this, r, d); 
+    } 
+ 
+private: 
+    template <typename... Args> 
+    void offer_work_impl(execution_data& ed, Args&&... args) { 
+        small_object_allocator alloc{}; 
+        // New right child 
+        auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); 
+ 
+        // New root node as a continuation and ref count. Left and right child attach to the new parent. 
+        right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); 
+ 
+        // Spawn the right sibling 
+        right_child->spawn_self(ed); 
+    } 
+ 
+    void spawn_self(execution_data& ed) { 
+        my_partition.spawn_task(*this, *context(ed)); 
+    } 
+}; 
+ 
+//! fold the tree and deallocate the task 
+template<typename Range, typename Body, typename Partitioner> 
+void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 
+    // Get the current parent and wait object before an object destruction 
+    node* parent = my_parent; 
+    auto allocator = my_allocator; 
+    // Task execution finished - destroy it 
+    this->~start_reduce(); 
+    // Unwind the tree decrementing the parent`s reference count 
+    fold_tree<tree_node_type>(parent, ed); 
+    allocator.deallocate(this, ed); 
+} 
+ 
+//! Execute parallel_reduce task 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 
+    if (!is_same_affinity(ed)) { 
+        my_partition.note_affinity(execution_slot(ed)); 
+    } 
+    my_partition.check_being_stolen(*this, ed); 
+ 
+    // The acquire barrier synchronizes the data pointed with my_body if the left 
+    // task has already finished. 
+    if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { 
+        tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); 
+        my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); 
+        parent_ptr->has_right_zombie = true; 
+    } 
+    __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); 
+ 
+    my_partition.execute(*this, my_range, ed); 
+ 
+    finalize(ed); 
+    return nullptr; 
+} 
+ 
+//! Cancel parallel_reduce task 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 
+    finalize(ed); 
+    return nullptr; 
+} 
+ 
+//! Tree node type for parallel_deterministic_reduce. 
+/** @ingroup algorithms */ 
+template<typename Body> 
+struct deterministic_reduction_tree_node : public tree_node { 
+    Body right_body; 
+    Body& left_body; 
+ 
+    deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 
+        tree_node{parent, ref_count, alloc}, 
+        right_body{input_left_body, detail::split()}, 
+        left_body(input_left_body) 
+    {} 
+ 
+    void join(task_group_context* context) { 
+        if (!context->is_group_execution_cancelled()) 
+            left_body.join(right_body); 
+    } 
+}; 
+ 
+//! Task type used to split the work of parallel_deterministic_reduce. 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body, typename Partitioner> 
+struct start_deterministic_reduce : public task { 
+    Range my_range; 
+    Body& my_body; 
+    node* my_parent; 
+ 
+    typename Partitioner::task_partition_type my_partition; 
+    small_object_allocator my_allocator; 
+ 
+    task* execute(execution_data&) override; 
+    task* cancel(execution_data&) override; 
+    void finalize(const execution_data&); 
+ 
+    using tree_node_type = deterministic_reduction_tree_node<Body>; 
+ 
+    //! Constructor deterministic_reduce root task. 
+    start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : 
+        my_range(range), 
+        my_body(body), 
+        my_partition(partitioner), 
+        my_allocator(alloc) {} 
+    //! Splitting constructor used to generate children. 
+    /** parent_ becomes left child.  Newly constructed object is right child. */ 
+    start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, 
+                                small_object_allocator& alloc ) : 
+        my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 
+        my_body(body), 
+        my_partition(parent_.my_partition, split_obj), 
+        my_allocator(alloc) {} 
+    static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 
+        if ( !range.empty() ) { 
+            wait_node wn; 
+            small_object_allocator alloc{}; 
+            auto deterministic_reduce_task = 
+                alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); 
+            deterministic_reduce_task->my_parent = &wn; 
+            execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); 
+        } 
+    } 
+    static void run(const Range& range, Body& body, Partitioner& partitioner) { 
+        // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 
+        // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce 
+        // in the try-block. 
+        task_group_context context(PARALLEL_REDUCE); 
+        run(range, body, partitioner, context); 
+    } 
+    //! Run body for range, serves as callback for partitioner 
+    void run_body( Range &r ) { 
+        my_body( r ); 
+    } 
+    //! Spawn right task, serves as callback for partitioner 
+    void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 
+        offer_work_impl(ed, *this, split_obj); 
+    } 
+private: 
+    template <typename... Args> 
+    void offer_work_impl(execution_data& ed, Args&&... args) { 
+        small_object_allocator alloc{}; 
+        // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. 
+        auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); 
+ 
+        // New right child 
+        auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); 
+ 
+        right_child->my_parent = my_parent = new_tree_node; 
+ 
+        // Spawn the right sibling 
+        right_child->spawn_self(ed); 
+    } 
+ 
+    void spawn_self(execution_data& ed) { 
+        my_partition.spawn_task(*this, *context(ed)); 
+    } 
+}; 
+ 
+//! Fold the tree and deallocate the task 
+template<typename Range, typename Body, typename Partitioner> 
+void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 
+    // Get the current parent and wait object before an object destruction 
+    node* parent = my_parent; 
+ 
+    auto allocator = my_allocator; 
+    // Task execution finished - destroy it 
+    this->~start_deterministic_reduce(); 
+    // Unwind the tree decrementing the parent`s reference count 
+    fold_tree<tree_node_type>(parent, ed); 
+    allocator.deallocate(this, ed); 
+} 
+ 
+//! Execute parallel_deterministic_reduce task 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 
+    if (!is_same_affinity(ed)) { 
+        my_partition.note_affinity(execution_slot(ed)); 
+    } 
+    my_partition.check_being_stolen(*this, ed); 
+ 
+    my_partition.execute(*this, my_range, ed); 
+ 
+    finalize(ed); 
+    return NULL; 
+} 
+ 
+//! Cancel parallel_deterministic_reduce task 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 
+    finalize(ed); 
+    return NULL; 
+} 
+ 
+ 
+//! Auxiliary class for parallel_reduce; for internal use only. 
+/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" 
+    using given \ref parallel_reduce_lambda_req "anonymous function objects". 
+ **/ 
+/** @ingroup algorithms */ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+class lambda_reduce_body { 
+//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced 
+//       (might require some performance measurements) 
+ 
+    const Value&     my_identity_element; 
+    const RealBody&  my_real_body; 
+    const Reduction& my_reduction; 
+    Value            my_value; 
+    lambda_reduce_body& operator= ( const lambda_reduce_body& other ); 
+public: 
+    lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) 
+        : my_identity_element(identity) 
+        , my_real_body(body) 
+        , my_reduction(reduction) 
+        , my_value(identity) 
+    { } 
+    lambda_reduce_body( const lambda_reduce_body& other ) = default; 
+    lambda_reduce_body( lambda_reduce_body& other, tbb::split ) 
+        : my_identity_element(other.my_identity_element) 
+        , my_real_body(other.my_real_body) 
+        , my_reduction(other.my_reduction) 
+        , my_value(other.my_identity_element) 
+    { } 
+    void operator()(Range& range) { 
+        my_value = my_real_body(range, const_cast<const Value&>(my_value)); 
+    } 
+    void join( lambda_reduce_body& rhs ) { 
+        my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); 
+    } 
+    Value result() const { 
+        return my_value; 
+    } 
+}; 
+ 
+ 
+// Requirements on Range concept are documented in blocked_range.h 
+ 
+/** \page parallel_reduce_body_req Requirements on parallel_reduce body 
+    Class \c Body implementing the concept of parallel_reduce body must define: 
+    - \code Body::Body( Body&, split ); \endcode        Splitting constructor. 
+                                                        Must be able to run concurrently with operator() and method \c join 
+    - \code Body::~Body(); \endcode                     Destructor 
+    - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r 
+                                                        and accumulating the result 
+    - \code void Body::join( Body& b ); \endcode        Join results. 
+                                                        The result in \c b should be merged into the result of \c this 
+**/ 
+ 
+/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) 
+    TO BE DOCUMENTED 
+**/ 
+ 
+/** \name parallel_reduce 
+    See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ 
+//@{ 
+ 
+//! Parallel iteration with reduction and default partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body ) { 
+    start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); 
+} 
+ 
+//! Parallel iteration with reduction and simple_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 
+    start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); 
+} 
+ 
+//! Parallel iteration with reduction and auto_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { 
+    start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); 
+} 
+ 
+//! Parallel iteration with reduction and static_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 
+    start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); 
+} 
+ 
+//! Parallel iteration with reduction and affinity_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { 
+    start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); 
+} 
+ 
+//! Parallel iteration with reduction, default partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { 
+    start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 
+} 
+ 
+//! Parallel iteration with reduction, simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 
+    start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); 
+} 
+ 
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { 
+    start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); 
+} 
+ 
+//! Parallel iteration with reduction, static_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 
+    start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); 
+} 
+ 
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { 
+    start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); 
+} 
+/** parallel_reduce overloads that work with anonymous function objects 
+    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 
+ 
+//! Parallel iteration with reduction and default partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 
+                          ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction and simple_partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const simple_partitioner& partitioner ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 
+                          ::run(range, body, partitioner ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction and auto_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const auto_partitioner& partitioner ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 
+                          ::run( range, body, partitioner ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction and static_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const static_partitioner& partitioner ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 
+                                        ::run( range, body, partitioner ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction and affinity_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       affinity_partitioner& partitioner ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 
+                                        ::run( range, body, partitioner ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction, default partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       task_group_context& context ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 
+                          ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction, simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const simple_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 
+                          ::run( range, body, partitioner, context ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const auto_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 
+                          ::run( range, body, partitioner, context ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction, static_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       const static_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 
+                                        ::run( range, body, partitioner, context ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+                       affinity_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 
+                                        ::run( range, body, partitioner, context ); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with deterministic reduction and default simple partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body ) { 
+    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); 
+} 
+ 
+//! Parallel iteration with deterministic reduction and simple partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 
+    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); 
+} 
+ 
+//! Parallel iteration with deterministic reduction and static partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 
+    start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { 
+    start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 
+    start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 
+    start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); 
+} 
+ 
+/** parallel_reduce overloads that work with anonymous function objects 
+    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 
+ 
+//! Parallel iteration with deterministic reduction and default simple partitioner. 
+// TODO: consider making static_partitioner the default 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 
+    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); 
+} 
+ 
+//! Parallel iteration with deterministic reduction and simple partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { 
+    lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 
+    start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> 
+                          ::run(range, body, partitioner); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with deterministic reduction and static partitioner. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { 
+    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 
+    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 
+        ::run(range, body, partitioner); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+    task_group_context& context ) { 
+    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+    const simple_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 
+    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> 
+        ::run(range, body, partitioner, context); 
+    return body.result(); 
+} 
+ 
+//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename RealBody, typename Reduction> 
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 
+    const static_partitioner& partitioner, task_group_context& context ) { 
+    lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 
+    start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 
+        ::run(range, body, partitioner, context); 
+    return body.result(); 
+} 
+//@} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::parallel_reduce; 
+using detail::d1::parallel_deterministic_reduce; 
+// Split types 
+using detail::split; 
+using detail::proportional_split; 
+} // namespace v1 
+ 
+} // namespace tbb 
+#endif /* __TBB_parallel_reduce_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h
index d5d69ca0b2..45bf6a2352 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h
@@ -1,590 +1,590 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_scan_H
-#define __TBB_parallel_scan_H
-
-#include <functional>
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_exception.h"
-#include "detail/_task.h"
-
-#include "profiling.h"
-#include "partitioner.h"
-#include "blocked_range.h"
-#include "task_group.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Used to indicate that the initial scan is being performed.
-/** @ingroup algorithms */
-struct pre_scan_tag {
-    static bool is_final_scan() {return false;}
-    operator bool() {return is_final_scan();}
-};
-
-//! Used to indicate that the final scan is being performed.
-/** @ingroup algorithms */
-struct final_scan_tag {
-    static bool is_final_scan() {return true;}
-    operator bool() {return is_final_scan();}
-};
-
-template<typename Range, typename Body>
-struct sum_node;
-
-//! Performs final scan for a leaf
-/** @ingroup algorithms */
-template<typename Range, typename Body>
-struct final_sum : public task {
-private:
-    using sum_node_type = sum_node<Range, Body>;
-    Body m_body;
-    aligned_space<Range> m_range;
-    //! Where to put result of last subrange, or nullptr if not last subrange.
-    Body* m_stuff_last;
-
-    wait_context& m_wait_context;
-    sum_node_type* m_parent = nullptr;
-public:
-    small_object_allocator m_allocator;
-    final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) :
-        m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) {
-        poison_pointer(m_stuff_last);
-    }
-
-    final_sum( final_sum& sum, small_object_allocator& alloc ) :
-        m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) {
-        poison_pointer(m_stuff_last);
-    }
-
-    ~final_sum() {
-        m_range.begin()->~Range();
-    }
-    void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) {
-        __TBB_ASSERT( m_parent == nullptr, nullptr );
-        m_parent = parent;
-        new( m_range.begin() ) Range(range);
-        m_stuff_last = stuff_last;
-    }
-private:
-    sum_node_type* release_parent() {
-        call_itt_task_notify(releasing, m_parent);
-        if (m_parent) {
-            auto parent = m_parent;
-            m_parent = nullptr;
-            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
-                return parent;
-            }
-        }
-        else
-            m_wait_context.release();
-        return nullptr;
-    }
-    sum_node_type* finalize(const execution_data& ed){
-        sum_node_type* next_task = release_parent();
-        m_allocator.delete_object<final_sum>(this, ed);
-        return next_task;
-    }
-
-public:
-    task* execute(execution_data& ed) override {
-        m_body( *m_range.begin(), final_scan_tag() );
-        if( m_stuff_last )
-            m_stuff_last->assign(m_body);
-
-        return finalize(ed);
-    }
-    task* cancel(execution_data& ed) override {
-        return finalize(ed);
-    }
-    template<typename Tag>
-    void operator()( const Range& r, Tag tag ) {
-        m_body( r, tag );
-    }
-    void reverse_join( final_sum& a ) {
-        m_body.reverse_join(a.m_body);
-    }
-    void reverse_join( Body& body ) {
-        m_body.reverse_join(body);
-    }
-    void assign_to( Body& body ) {
-        body.assign(m_body);
-    }
-    void self_destroy(const execution_data& ed) {
-        m_allocator.delete_object<final_sum>(this, ed);
-    }
-};
-
-//! Split work to be done in the scan.
-/** @ingroup algorithms */
-template<typename Range, typename Body>
-struct sum_node : public task {
-private:
-    using final_sum_type = final_sum<Range,Body>;
-public:
-    final_sum_type *m_incoming;
-    final_sum_type *m_body;
-    Body *m_stuff_last;
-private:
-    final_sum_type *m_left_sum;
-    sum_node *m_left;
-    sum_node *m_right;
-    bool m_left_is_final;
-    Range m_range;
-    wait_context& m_wait_context;
-    sum_node* m_parent;
-    small_object_allocator m_allocator;
-public:
-    std::atomic<unsigned int> ref_count{0};
-    sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) :
-        m_stuff_last(nullptr),
-        m_left_sum(nullptr),
-        m_left(nullptr),
-        m_right(nullptr),
-        m_left_is_final(left_is_final_),
-        m_range(range),
-        m_wait_context(w_o),
-        m_parent(parent),
-        m_allocator(alloc)
-    {
-        if( m_parent )
-            m_parent->ref_count.fetch_add(1, std::memory_order_relaxed);
-        // Poison fields that will be set by second pass.
-        poison_pointer(m_body);
-        poison_pointer(m_incoming);
-    }
-
-    ~sum_node() {
-        if (m_parent)
-            m_parent->ref_count.fetch_sub(1, std::memory_order_relaxed);
-    }
-private:
-    sum_node* release_parent() {
-        call_itt_task_notify(releasing, m_parent);
-        if (m_parent) {
-            auto parent = m_parent;
-            m_parent = nullptr;
-            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
-                return parent;
-            }
-        }
-        else
-            m_wait_context.release();
-        return nullptr;
-    }
-    task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) {
-        if( child ) {
-            __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr );
-            child->prepare_for_execution(body, incoming, stuff_last);
-            return child;
-        } else {
-            body.finish_construction(this, range, stuff_last);
-            return &body;
-        }
-    }
-
-    sum_node* finalize(const execution_data& ed) {
-        sum_node* next_task = release_parent();
-        m_allocator.delete_object<sum_node>(this, ed);
-        return next_task;
-    }
-
-public:
-    void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) {
-        this->m_body = &body;
-        this->m_incoming = incoming;
-        this->m_stuff_last = stuff_last;
-    }
-    task* execute(execution_data& ed) override {
-        if( m_body ) {
-            if( m_incoming )
-                m_left_sum->reverse_join( *m_incoming );
-            task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last);
-            task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr);
-            ref_count = (left_child != nullptr) + (right_child != nullptr);
-            m_body = nullptr;
-            if( left_child ) {
-                spawn(*right_child, *ed.context);
-                return left_child;
-            } else {
-                return right_child;
-            }
-        } else {
-            return finalize(ed);
-        }
-    }
-    task* cancel(execution_data& ed) override {
-        return finalize(ed);
-    }
-    void self_destroy(const execution_data& ed) {
-        m_allocator.delete_object<sum_node>(this, ed);
-    }
-    template<typename range,typename body,typename partitioner>
-    friend struct start_scan;
-
-    template<typename range,typename body>
-    friend struct finish_scan;
-};
-
-//! Combine partial results
-/** @ingroup algorithms */
-template<typename Range, typename Body>
-struct finish_scan : public task {
-private:
-    using sum_node_type = sum_node<Range,Body>;
-    using final_sum_type = final_sum<Range,Body>;
-    final_sum_type** const m_sum_slot;
-    sum_node_type*& m_return_slot;
-    small_object_allocator m_allocator;
-public:
-    final_sum_type* m_right_zombie;
-    sum_node_type& m_result;
-    std::atomic<unsigned int> ref_count{2};
-    finish_scan*  m_parent;
-    wait_context& m_wait_context;
-    task* execute(execution_data& ed) override {
-        __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr );
-        if( m_result.m_left )
-            m_result.m_left_is_final = false;
-        if( m_right_zombie && m_sum_slot )
-            (*m_sum_slot)->reverse_join(*m_result.m_left_sum);
-        __TBB_ASSERT( !m_return_slot, nullptr );
-        if( m_right_zombie || m_result.m_right ) {
-            m_return_slot = &m_result;
-        } else {
-            m_result.self_destroy(ed);
-        }
-        if( m_right_zombie && !m_sum_slot && !m_result.m_right ) {
-            m_right_zombie->self_destroy(ed);
-            m_right_zombie = nullptr;
-        }
-        return finalize(ed);
-    }
-    task* cancel(execution_data& ed) override {
-        return finalize(ed);
-    }
-    finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) :
-        m_sum_slot(sum),
-        m_return_slot(return_slot),
-        m_allocator(alloc),
-        m_right_zombie(nullptr),
-        m_result(result_),
-        m_parent(parent),
-        m_wait_context(w_o)
-    {
-        __TBB_ASSERT( !m_return_slot, nullptr );
-    }
-private:
-    finish_scan* release_parent() {
-        call_itt_task_notify(releasing, m_parent);
-        if (m_parent) {
-            auto parent = m_parent;
-            m_parent = nullptr;
-            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
-                return parent;
-            }
-        }
-        else
-            m_wait_context.release();
-        return nullptr;
-    }
-    finish_scan* finalize(const execution_data& ed) {
-        finish_scan* next_task = release_parent();
-        m_allocator.delete_object<finish_scan>(this, ed);
-        return next_task;
-    }
-};
-
-//! Initial task to split the work
-/** @ingroup algorithms */
-template<typename Range, typename Body, typename Partitioner>
-struct start_scan : public task {
-private:
-    using sum_node_type = sum_node<Range,Body>;
-    using final_sum_type = final_sum<Range,Body>;
-    using finish_pass1_type = finish_scan<Range,Body>;
-    std::reference_wrapper<sum_node_type*> m_return_slot;
-    Range m_range;
-    std::reference_wrapper<final_sum_type> m_body;
-    typename Partitioner::partition_type m_partition;
-    /** Non-null if caller is requesting total. */
-    final_sum_type** m_sum_slot;
-    bool m_is_final;
-    bool m_is_right_child;
-
-    finish_pass1_type*  m_parent;
-    small_object_allocator m_allocator;
-    wait_context& m_wait_context;
-
-    finish_pass1_type* release_parent() {
-        call_itt_task_notify(releasing, m_parent);
-        if (m_parent) {
-            auto parent = m_parent;
-            m_parent = nullptr;
-            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
-                return parent;
-            }
-        }
-        else
-            m_wait_context.release();
-        return nullptr;
-    }
-
-    finish_pass1_type* finalize( const execution_data& ed ) {
-        finish_pass1_type* next_task = release_parent();
-        m_allocator.delete_object<start_scan>(this, ed);
-        return next_task;
-    }
-
-public:
-    task* execute( execution_data& ) override;
-    task* cancel( execution_data& ed ) override {
-        return finalize(ed);
-    }
-    start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) :
-        m_return_slot(return_slot),
-        m_range(parent.m_range,split()),
-        m_body(parent.m_body),
-        m_partition(parent.m_partition,split()),
-        m_sum_slot(parent.m_sum_slot),
-        m_is_final(parent.m_is_final),
-        m_is_right_child(true),
-        m_parent(parent.m_parent),
-        m_allocator(alloc),
-        m_wait_context(parent.m_wait_context)
-    {
-        __TBB_ASSERT( !m_return_slot, nullptr );
-        parent.m_is_right_child = false;
-    }
-
-    start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) :
-        m_return_slot(return_slot),
-        m_range(range),
-        m_body(body),
-        m_partition(partitioner),
-        m_sum_slot(nullptr),
-        m_is_final(true),
-        m_is_right_child(false),
-        m_parent(nullptr),
-        m_allocator(alloc),
-        m_wait_context(w_o)
-    {
-        __TBB_ASSERT( !m_return_slot, nullptr );
-    }
-
-    static void run( const Range& range, Body& body, const Partitioner& partitioner ) {
-        if( !range.empty() ) {
-            task_group_context context(PARALLEL_SCAN);
-
-            using start_pass1_type = start_scan<Range,Body,Partitioner>;
-            sum_node_type* root = nullptr;
-            wait_context w_ctx{1};
-            small_object_allocator alloc{};
-
-            auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc);
-            temp_body.reverse_join(body);
-
-            auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc);
-
-            execute_and_wait(pass1, context, w_ctx, context);
-            if( root ) {
-                root->prepare_for_execution(temp_body, nullptr, &body);
-                w_ctx.reserve();
-                execute_and_wait(*root, context, w_ctx, context);
-            } else {
-                temp_body.assign_to(body);
-                temp_body.finish_construction(nullptr, range, nullptr);
-                alloc.delete_object<final_sum_type>(&temp_body);
-            }
-        }
-    }
-};
-
-template<typename Range, typename Body, typename Partitioner>
-task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) {
-    // Inspecting m_parent->result.left_sum would ordinarily be a race condition.
-    // But we inspect it only if we are not a stolen task, in which case we
-    // know that task assigning to m_parent->result.left_sum has completed.
-    __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan");
-    bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum);
-    if( treat_as_stolen ) {
-        // Invocation is for right child that has been really stolen or needs to be virtually stolen
-        small_object_allocator alloc{};
-        m_parent->m_right_zombie = alloc.new_object<final_sum_type>(m_body, alloc);
-        m_body = *m_parent->m_right_zombie;
-        m_is_final = false;
-    }
-    task* next_task = nullptr;
-    if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) {
-        if( m_is_final )
-            m_body(m_range, final_scan_tag());
-        else if( m_sum_slot )
-            m_body(m_range, pre_scan_tag());
-        if( m_sum_slot )
-            *m_sum_slot = &m_body.get();
-        __TBB_ASSERT( !m_return_slot, nullptr );
-
-        next_task = finalize(ed);
-    } else {
-        small_object_allocator alloc{};
-        auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc);
-
-        auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc);
-        m_parent = new_parent;
-
-        // Split off right child
-        auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc);
-
-        spawn(right_child, *ed.context);
-
-        m_sum_slot = &result->m_left_sum;
-        m_return_slot = result->m_left;
-
-        __TBB_ASSERT( !m_return_slot, nullptr );
-        next_task = this;
-    }
-    return next_task;
-}
-
-template<typename Range, typename Value, typename Scan, typename ReverseJoin>
-class lambda_scan_body {
-    Value               m_sum_slot;
-    const Value&        identity_element;
-    const Scan&         m_scan;
-    const ReverseJoin&  m_reverse_join;
-public:
-    void operator=(const lambda_scan_body&) = delete;
-    lambda_scan_body(const lambda_scan_body&) = default;
-
-    lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join )
-        : m_sum_slot(identity)
-        , identity_element(identity)
-        , m_scan(scan)
-        , m_reverse_join(rev_join) {}
-
-    lambda_scan_body( lambda_scan_body& b, split )
-        : m_sum_slot(b.identity_element)
-        , identity_element(b.identity_element)
-        , m_scan(b.m_scan)
-        , m_reverse_join(b.m_reverse_join) {}
-
-    template<typename Tag>
-    void operator()( const Range& r, Tag tag ) {
-        m_sum_slot = m_scan(r, m_sum_slot, tag);
-    }
-
-    void reverse_join( lambda_scan_body& a ) {
-        m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot);
-    }
-
-    void assign( lambda_scan_body& b ) {
-        m_sum_slot = b.m_sum_slot;
-    }
-
-    Value result() const {
-        return m_sum_slot;
-    }
-};
-
-// Requirements on Range concept are documented in blocked_range.h
-
-/** \page parallel_scan_body_req Requirements on parallel_scan body
-    Class \c Body implementing the concept of parallel_scan body must define:
-    - \code Body::Body( Body&, split ); \endcode    Splitting constructor.
-                                                    Split \c b so that \c this and \c b can accumulate separately
-    - \code Body::~Body(); \endcode                 Destructor
-    - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
-                                                    Preprocess iterations for range \c r
-    - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
-                                                    Do final processing for iterations of range \c r
-    - \code void Body::reverse_join( Body& a ); \endcode
-                                                    Merge preprocessing state of \c a into \c this, where \c a was
-                                                    created earlier from \c b by b's splitting constructor
-**/
-
-/** \name parallel_scan
-    See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
-//@{
-
-//! Parallel prefix with default partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body ) {
-    start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER());
-}
-
-//! Parallel prefix with simple_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
-    start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner);
-}
-
-//! Parallel prefix with auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Body>
-void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
-    start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner);
-}
-
-//! Parallel prefix with default partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename Scan, typename ReverseJoin>
-Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
-    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
-    parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER());
-    return body.result();
-}
-
-//! Parallel prefix with simple_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename Scan, typename ReverseJoin>
-Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join,
-                     const simple_partitioner& partitioner ) {
-    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
-    parallel_scan(range, body, partitioner);
-    return body.result();
-}
-
-//! Parallel prefix with auto_partitioner
-/** @ingroup algorithms **/
-template<typename Range, typename Value, typename Scan, typename ReverseJoin>
-Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join,
-                     const auto_partitioner& partitioner ) {
-    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
-    parallel_scan(range, body, partitioner);
-    return body.result();
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::d1::parallel_scan;
-    using detail::d1::pre_scan_tag;
-    using detail::d1::final_scan_tag;
-
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_parallel_scan_H */
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_scan_H 
+#define __TBB_parallel_scan_H 
+ 
+#include <functional> 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_exception.h" 
+#include "detail/_task.h" 
+ 
+#include "profiling.h" 
+#include "partitioner.h" 
+#include "blocked_range.h" 
+#include "task_group.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Used to indicate that the initial scan is being performed. 
+/** @ingroup algorithms */ 
+struct pre_scan_tag { 
+    static bool is_final_scan() {return false;} 
+    operator bool() {return is_final_scan();} 
+}; 
+ 
+//! Used to indicate that the final scan is being performed. 
+/** @ingroup algorithms */ 
+struct final_scan_tag { 
+    static bool is_final_scan() {return true;} 
+    operator bool() {return is_final_scan();} 
+}; 
+ 
+template<typename Range, typename Body> 
+struct sum_node; 
+ 
+//! Performs final scan for a leaf 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body> 
+struct final_sum : public task { 
+private: 
+    using sum_node_type = sum_node<Range, Body>; 
+    Body m_body; 
+    aligned_space<Range> m_range; 
+    //! Where to put result of last subrange, or nullptr if not last subrange. 
+    Body* m_stuff_last; 
+ 
+    wait_context& m_wait_context; 
+    sum_node_type* m_parent = nullptr; 
+public: 
+    small_object_allocator m_allocator; 
+    final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) : 
+        m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) { 
+        poison_pointer(m_stuff_last); 
+    } 
+ 
+    final_sum( final_sum& sum, small_object_allocator& alloc ) : 
+        m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) { 
+        poison_pointer(m_stuff_last); 
+    } 
+ 
+    ~final_sum() { 
+        m_range.begin()->~Range(); 
+    } 
+    void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) { 
+        __TBB_ASSERT( m_parent == nullptr, nullptr ); 
+        m_parent = parent; 
+        new( m_range.begin() ) Range(range); 
+        m_stuff_last = stuff_last; 
+    } 
+private: 
+    sum_node_type* release_parent() { 
+        call_itt_task_notify(releasing, m_parent); 
+        if (m_parent) { 
+            auto parent = m_parent; 
+            m_parent = nullptr; 
+            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { 
+                return parent; 
+            } 
+        } 
+        else 
+            m_wait_context.release(); 
+        return nullptr; 
+    } 
+    sum_node_type* finalize(const execution_data& ed){ 
+        sum_node_type* next_task = release_parent(); 
+        m_allocator.delete_object<final_sum>(this, ed); 
+        return next_task; 
+    } 
+ 
+public: 
+    task* execute(execution_data& ed) override { 
+        m_body( *m_range.begin(), final_scan_tag() ); 
+        if( m_stuff_last ) 
+            m_stuff_last->assign(m_body); 
+ 
+        return finalize(ed); 
+    } 
+    task* cancel(execution_data& ed) override { 
+        return finalize(ed); 
+    } 
+    template<typename Tag> 
+    void operator()( const Range& r, Tag tag ) { 
+        m_body( r, tag ); 
+    } 
+    void reverse_join( final_sum& a ) { 
+        m_body.reverse_join(a.m_body); 
+    } 
+    void reverse_join( Body& body ) { 
+        m_body.reverse_join(body); 
+    } 
+    void assign_to( Body& body ) { 
+        body.assign(m_body); 
+    } 
+    void self_destroy(const execution_data& ed) { 
+        m_allocator.delete_object<final_sum>(this, ed); 
+    } 
+}; 
+ 
+//! Split work to be done in the scan. 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body> 
+struct sum_node : public task { 
+private: 
+    using final_sum_type = final_sum<Range,Body>; 
+public: 
+    final_sum_type *m_incoming; 
+    final_sum_type *m_body; 
+    Body *m_stuff_last; 
+private: 
+    final_sum_type *m_left_sum; 
+    sum_node *m_left; 
+    sum_node *m_right; 
+    bool m_left_is_final; 
+    Range m_range; 
+    wait_context& m_wait_context; 
+    sum_node* m_parent; 
+    small_object_allocator m_allocator; 
+public: 
+    std::atomic<unsigned int> ref_count{0}; 
+    sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) : 
+        m_stuff_last(nullptr), 
+        m_left_sum(nullptr), 
+        m_left(nullptr), 
+        m_right(nullptr), 
+        m_left_is_final(left_is_final_), 
+        m_range(range), 
+        m_wait_context(w_o), 
+        m_parent(parent), 
+        m_allocator(alloc) 
+    { 
+        if( m_parent ) 
+            m_parent->ref_count.fetch_add(1, std::memory_order_relaxed); 
+        // Poison fields that will be set by second pass. 
+        poison_pointer(m_body); 
+        poison_pointer(m_incoming); 
+    } 
+ 
+    ~sum_node() { 
+        if (m_parent) 
+            m_parent->ref_count.fetch_sub(1, std::memory_order_relaxed); 
+    } 
+private: 
+    sum_node* release_parent() { 
+        call_itt_task_notify(releasing, m_parent); 
+        if (m_parent) { 
+            auto parent = m_parent; 
+            m_parent = nullptr; 
+            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { 
+                return parent; 
+            } 
+        } 
+        else 
+            m_wait_context.release(); 
+        return nullptr; 
+    } 
+    task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) { 
+        if( child ) { 
+            __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr ); 
+            child->prepare_for_execution(body, incoming, stuff_last); 
+            return child; 
+        } else { 
+            body.finish_construction(this, range, stuff_last); 
+            return &body; 
+        } 
+    } 
+ 
+    sum_node* finalize(const execution_data& ed) { 
+        sum_node* next_task = release_parent(); 
+        m_allocator.delete_object<sum_node>(this, ed); 
+        return next_task; 
+    } 
+ 
+public: 
+    void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) { 
+        this->m_body = &body; 
+        this->m_incoming = incoming; 
+        this->m_stuff_last = stuff_last; 
+    } 
+    task* execute(execution_data& ed) override { 
+        if( m_body ) { 
+            if( m_incoming ) 
+                m_left_sum->reverse_join( *m_incoming ); 
+            task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last); 
+            task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr); 
+            ref_count = (left_child != nullptr) + (right_child != nullptr); 
+            m_body = nullptr; 
+            if( left_child ) { 
+                spawn(*right_child, *ed.context); 
+                return left_child; 
+            } else { 
+                return right_child; 
+            } 
+        } else { 
+            return finalize(ed); 
+        } 
+    } 
+    task* cancel(execution_data& ed) override { 
+        return finalize(ed); 
+    } 
+    void self_destroy(const execution_data& ed) { 
+        m_allocator.delete_object<sum_node>(this, ed); 
+    } 
+    template<typename range,typename body,typename partitioner> 
+    friend struct start_scan; 
+ 
+    template<typename range,typename body> 
+    friend struct finish_scan; 
+}; 
+ 
+//! Combine partial results 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body> 
+struct finish_scan : public task { 
+private: 
+    using sum_node_type = sum_node<Range,Body>; 
+    using final_sum_type = final_sum<Range,Body>; 
+    final_sum_type** const m_sum_slot; 
+    sum_node_type*& m_return_slot; 
+    small_object_allocator m_allocator; 
+public: 
+    final_sum_type* m_right_zombie; 
+    sum_node_type& m_result; 
+    std::atomic<unsigned int> ref_count{2}; 
+    finish_scan*  m_parent; 
+    wait_context& m_wait_context; 
+    task* execute(execution_data& ed) override { 
+        __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr ); 
+        if( m_result.m_left ) 
+            m_result.m_left_is_final = false; 
+        if( m_right_zombie && m_sum_slot ) 
+            (*m_sum_slot)->reverse_join(*m_result.m_left_sum); 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+        if( m_right_zombie || m_result.m_right ) { 
+            m_return_slot = &m_result; 
+        } else { 
+            m_result.self_destroy(ed); 
+        } 
+        if( m_right_zombie && !m_sum_slot && !m_result.m_right ) { 
+            m_right_zombie->self_destroy(ed); 
+            m_right_zombie = nullptr; 
+        } 
+        return finalize(ed); 
+    } 
+    task* cancel(execution_data& ed) override { 
+        return finalize(ed); 
+    } 
+    finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) : 
+        m_sum_slot(sum), 
+        m_return_slot(return_slot), 
+        m_allocator(alloc), 
+        m_right_zombie(nullptr), 
+        m_result(result_), 
+        m_parent(parent), 
+        m_wait_context(w_o) 
+    { 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+    } 
+private: 
+    finish_scan* release_parent() { 
+        call_itt_task_notify(releasing, m_parent); 
+        if (m_parent) { 
+            auto parent = m_parent; 
+            m_parent = nullptr; 
+            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { 
+                return parent; 
+            } 
+        } 
+        else 
+            m_wait_context.release(); 
+        return nullptr; 
+    } 
+    finish_scan* finalize(const execution_data& ed) { 
+        finish_scan* next_task = release_parent(); 
+        m_allocator.delete_object<finish_scan>(this, ed); 
+        return next_task; 
+    } 
+}; 
+ 
+//! Initial task to split the work 
+/** @ingroup algorithms */ 
+template<typename Range, typename Body, typename Partitioner> 
+struct start_scan : public task { 
+private: 
+    using sum_node_type = sum_node<Range,Body>; 
+    using final_sum_type = final_sum<Range,Body>; 
+    using finish_pass1_type = finish_scan<Range,Body>; 
+    std::reference_wrapper<sum_node_type*> m_return_slot; 
+    Range m_range; 
+    std::reference_wrapper<final_sum_type> m_body; 
+    typename Partitioner::partition_type m_partition; 
+    /** Non-null if caller is requesting total. */ 
+    final_sum_type** m_sum_slot; 
+    bool m_is_final; 
+    bool m_is_right_child; 
+ 
+    finish_pass1_type*  m_parent; 
+    small_object_allocator m_allocator; 
+    wait_context& m_wait_context; 
+ 
+    finish_pass1_type* release_parent() { 
+        call_itt_task_notify(releasing, m_parent); 
+        if (m_parent) { 
+            auto parent = m_parent; 
+            m_parent = nullptr; 
+            if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { 
+                return parent; 
+            } 
+        } 
+        else 
+            m_wait_context.release(); 
+        return nullptr; 
+    } 
+ 
+    finish_pass1_type* finalize( const execution_data& ed ) { 
+        finish_pass1_type* next_task = release_parent(); 
+        m_allocator.delete_object<start_scan>(this, ed); 
+        return next_task; 
+    } 
+ 
+public: 
+    task* execute( execution_data& ) override; 
+    task* cancel( execution_data& ed ) override { 
+        return finalize(ed); 
+    } 
+    start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) : 
+        m_return_slot(return_slot), 
+        m_range(parent.m_range,split()), 
+        m_body(parent.m_body), 
+        m_partition(parent.m_partition,split()), 
+        m_sum_slot(parent.m_sum_slot), 
+        m_is_final(parent.m_is_final), 
+        m_is_right_child(true), 
+        m_parent(parent.m_parent), 
+        m_allocator(alloc), 
+        m_wait_context(parent.m_wait_context) 
+    { 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+        parent.m_is_right_child = false; 
+    } 
+ 
+    start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) : 
+        m_return_slot(return_slot), 
+        m_range(range), 
+        m_body(body), 
+        m_partition(partitioner), 
+        m_sum_slot(nullptr), 
+        m_is_final(true), 
+        m_is_right_child(false), 
+        m_parent(nullptr), 
+        m_allocator(alloc), 
+        m_wait_context(w_o) 
+    { 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+    } 
+ 
+    static void run( const Range& range, Body& body, const Partitioner& partitioner ) { 
+        if( !range.empty() ) { 
+            task_group_context context(PARALLEL_SCAN); 
+ 
+            using start_pass1_type = start_scan<Range,Body,Partitioner>; 
+            sum_node_type* root = nullptr; 
+            wait_context w_ctx{1}; 
+            small_object_allocator alloc{}; 
+ 
+            auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc); 
+            temp_body.reverse_join(body); 
+ 
+            auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc); 
+ 
+            execute_and_wait(pass1, context, w_ctx, context); 
+            if( root ) { 
+                root->prepare_for_execution(temp_body, nullptr, &body); 
+                w_ctx.reserve(); 
+                execute_and_wait(*root, context, w_ctx, context); 
+            } else { 
+                temp_body.assign_to(body); 
+                temp_body.finish_construction(nullptr, range, nullptr); 
+                alloc.delete_object<final_sum_type>(&temp_body); 
+            } 
+        } 
+    } 
+}; 
+ 
+template<typename Range, typename Body, typename Partitioner> 
+task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) { 
+    // Inspecting m_parent->result.left_sum would ordinarily be a race condition. 
+    // But we inspect it only if we are not a stolen task, in which case we 
+    // know that task assigning to m_parent->result.left_sum has completed. 
+    __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan"); 
+    bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum); 
+    if( treat_as_stolen ) { 
+        // Invocation is for right child that has been really stolen or needs to be virtually stolen 
+        small_object_allocator alloc{}; 
+        m_parent->m_right_zombie = alloc.new_object<final_sum_type>(m_body, alloc); 
+        m_body = *m_parent->m_right_zombie; 
+        m_is_final = false; 
+    } 
+    task* next_task = nullptr; 
+    if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) { 
+        if( m_is_final ) 
+            m_body(m_range, final_scan_tag()); 
+        else if( m_sum_slot ) 
+            m_body(m_range, pre_scan_tag()); 
+        if( m_sum_slot ) 
+            *m_sum_slot = &m_body.get(); 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+ 
+        next_task = finalize(ed); 
+    } else { 
+        small_object_allocator alloc{}; 
+        auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc); 
+ 
+        auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc); 
+        m_parent = new_parent; 
+ 
+        // Split off right child 
+        auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc); 
+ 
+        spawn(right_child, *ed.context); 
+ 
+        m_sum_slot = &result->m_left_sum; 
+        m_return_slot = result->m_left; 
+ 
+        __TBB_ASSERT( !m_return_slot, nullptr ); 
+        next_task = this; 
+    } 
+    return next_task; 
+} 
+ 
+template<typename Range, typename Value, typename Scan, typename ReverseJoin> 
+class lambda_scan_body { 
+    Value               m_sum_slot; 
+    const Value&        identity_element; 
+    const Scan&         m_scan; 
+    const ReverseJoin&  m_reverse_join; 
+public: 
+    void operator=(const lambda_scan_body&) = delete; 
+    lambda_scan_body(const lambda_scan_body&) = default; 
+ 
+    lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join ) 
+        : m_sum_slot(identity) 
+        , identity_element(identity) 
+        , m_scan(scan) 
+        , m_reverse_join(rev_join) {} 
+ 
+    lambda_scan_body( lambda_scan_body& b, split ) 
+        : m_sum_slot(b.identity_element) 
+        , identity_element(b.identity_element) 
+        , m_scan(b.m_scan) 
+        , m_reverse_join(b.m_reverse_join) {} 
+ 
+    template<typename Tag> 
+    void operator()( const Range& r, Tag tag ) { 
+        m_sum_slot = m_scan(r, m_sum_slot, tag); 
+    } 
+ 
+    void reverse_join( lambda_scan_body& a ) { 
+        m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot); 
+    } 
+ 
+    void assign( lambda_scan_body& b ) { 
+        m_sum_slot = b.m_sum_slot; 
+    } 
+ 
+    Value result() const { 
+        return m_sum_slot; 
+    } 
+}; 
+ 
+// Requirements on Range concept are documented in blocked_range.h 
+ 
+/** \page parallel_scan_body_req Requirements on parallel_scan body 
+    Class \c Body implementing the concept of parallel_scan body must define: 
+    - \code Body::Body( Body&, split ); \endcode    Splitting constructor. 
+                                                    Split \c b so that \c this and \c b can accumulate separately 
+    - \code Body::~Body(); \endcode                 Destructor 
+    - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode 
+                                                    Preprocess iterations for range \c r 
+    - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode 
+                                                    Do final processing for iterations of range \c r 
+    - \code void Body::reverse_join( Body& a ); \endcode 
+                                                    Merge preprocessing state of \c a into \c this, where \c a was 
+                                                    created earlier from \c b by b's splitting constructor 
+**/ 
+ 
+/** \name parallel_scan 
+    See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ 
+//@{ 
+ 
+//! Parallel prefix with default partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_scan( const Range& range, Body& body ) { 
+    start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER()); 
+} 
+ 
+//! Parallel prefix with simple_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { 
+    start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner); 
+} 
+ 
+//! Parallel prefix with auto_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Body> 
+void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { 
+    start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner); 
+} 
+ 
+//! Parallel prefix with default partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename Scan, typename ReverseJoin> 
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { 
+    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); 
+    parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER()); 
+    return body.result(); 
+} 
+ 
+//! Parallel prefix with simple_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename Scan, typename ReverseJoin> 
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, 
+                     const simple_partitioner& partitioner ) { 
+    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); 
+    parallel_scan(range, body, partitioner); 
+    return body.result(); 
+} 
+ 
+//! Parallel prefix with auto_partitioner 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Value, typename Scan, typename ReverseJoin> 
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, 
+                     const auto_partitioner& partitioner ) { 
+    lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); 
+    parallel_scan(range, body, partitioner); 
+    return body.result(); 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::d1::parallel_scan; 
+    using detail::d1::pre_scan_tag; 
+    using detail::d1::final_scan_tag; 
+ 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_parallel_scan_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h
index 0e7be5e25b..eaaa89707a 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h
@@ -1,247 +1,247 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_parallel_sort_H
-#define __TBB_parallel_sort_H
-
-#include "detail/_namespace_injection.h"
-#include "parallel_for.h"
-#include "blocked_range.h"
-#include "profiling.h"
-
-#include <algorithm>
-#include <iterator>
-#include <functional>
-#include <cstddef>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Range used in quicksort to split elements into subranges based on a value.
-/** The split operation selects a splitter and places all elements less than or equal
-    to the value in the first range and the remaining elements in the second range.
-    @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-class quick_sort_range {
-    std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const {
-        return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) )
-                                        : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) );
-    }
-
-    std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const {
-        std::size_t offset = range.size / 8u;
-        return median_of_three(array,
-                               median_of_three(array, 0 , offset, offset * 2),
-                               median_of_three(array, offset * 3, offset * 4, offset * 5),
-                               median_of_three(array, offset * 6, offset * 7, range.size - 1));
-
-    }
-
-    std::size_t split_range( quick_sort_range& range ) {
-        RandomAccessIterator array = range.begin;
-        RandomAccessIterator first_element = range.begin;
-        std::size_t m = pseudo_median_of_nine(array, range);
-        if( m != 0 ) std::iter_swap(array, array + m);
-
-        std::size_t i = 0;
-        std::size_t j = range.size;
-        // Partition interval [i + 1,j - 1] with key *first_element.
-        for(;;) {
-            __TBB_ASSERT( i < j, nullptr );
-            // Loop must terminate since array[l] == *first_element.
-            do {
-                --j;
-                __TBB_ASSERT( i <= j, "bad ordering relation?" );
-            } while( comp(*first_element, array[j]) );
-            do {
-                __TBB_ASSERT( i <= j, nullptr );
-                if( i == j ) goto partition;
-                ++i;
-            } while( comp(array[i], *first_element) );
-            if( i == j ) goto partition;
-            std::iter_swap(array + i, array + j);
-        }
-partition:
-        // Put the partition key were it belongs
-        std::iter_swap(array + j, first_element);
-        // array[l..j) is less or equal to key.
-        // array(j..r) is greater or equal to key.
-        // array[j] is equal to key
-        i = j + 1;
-        std::size_t new_range_size = range.size - i;
-        range.size = j;
-        return new_range_size;
-    }
-
-public:
-    quick_sort_range() = default;
-    quick_sort_range( const quick_sort_range& ) = default;
-    void operator=( const quick_sort_range& ) = delete;
-
-    static constexpr std::size_t grainsize = 500;
-    const Compare& comp;
-    std::size_t size;
-    RandomAccessIterator begin;
-
-    quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) :
-        comp(comp_), size(size_), begin(begin_) {}
-
-    bool empty() const { return size == 0; }
-    bool is_divisible() const { return size >= grainsize; }
-
-    quick_sort_range( quick_sort_range& range, split )
-        : comp(range.comp)
-        , size(split_range(range))
-          // +1 accounts for the pivot element, which is at its correct place
-          // already and, therefore, is not included into subranges.
-        , begin(range.begin + range.size + 1) {}
-};
-
-//! Body class used to test if elements in a range are presorted
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-class quick_sort_pretest_body {
-    const Compare& comp;
-    task_group_context& context;
-
-public:
-    quick_sort_pretest_body() = default;
-    quick_sort_pretest_body( const quick_sort_pretest_body& ) = default;
-    void operator=( const quick_sort_pretest_body& ) = delete;
-
-    quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {}
-
-    void operator()( const blocked_range<RandomAccessIterator>& range ) const {
-        RandomAccessIterator my_end = range.end();
-
-        int i = 0;
-        //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements)
-        for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) {
-            if( i % 64 == 0 && context.is_group_execution_cancelled() ) break;
-
-            // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
-            if( comp(*(k), *(k - 1)) ) {
-                context.cancel_group_execution();
-                break;
-            }
-        }
-    }
-};
-
-//! Body class used to sort elements in a range that is smaller than the grainsize.
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-struct quick_sort_body {
-    void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
-        std::sort(range.begin, range.begin + range.size, range.comp);
-    }
-};
-
-//! Method to perform parallel_for based quick sort.
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
-    parallel_for(quick_sort_range<RandomAccessIterator,Compare>(begin, end - begin, comp),
-                 quick_sort_body<RandomAccessIterator,Compare>(),
-                 auto_partitioner());
-}
-
-//! Wrapper method to initiate the sort by calling parallel_for.
-/** @ingroup algorithms */
-template<typename RandomAccessIterator, typename Compare>
-void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
-    task_group_context my_context(PARALLEL_SORT);
-    constexpr int serial_cutoff = 9;
-
-    __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
-    RandomAccessIterator k = begin;
-    for( ; k != begin + serial_cutoff; ++k ) {
-        if( comp(*(k + 1), *k) ) {
-            do_parallel_quick_sort(begin, end, comp);
-        }
-    }
-
-    // Check is input range already sorted
-    parallel_for(blocked_range<RandomAccessIterator>(k + 1, end),
-                 quick_sort_pretest_body<RandomAccessIterator, Compare>(comp, my_context),
-                 auto_partitioner(),
-                 my_context);
-
-    if( my_context.is_group_execution_cancelled() )
-        do_parallel_quick_sort(begin, end, comp);
-}
-
-/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
-    Requirements on the iterator type \c It and its value type \c T for \c parallel_sort:
-
-    - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given
-    iterators \c a and \c b are pointing to. \c It should be a random access iterator.
-
-    - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y;
-**/
-
-/** \name parallel_sort
-    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
-//@{
-
-//! Sorts the data in [begin,end) using the given comparator
-/** The compare function object is used for all comparisons between elements during sorting.
-    The compare object must define a bool operator() function.
-    @ingroup algorithms **/
-template<typename RandomAccessIterator, typename Compare>
-void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
-    constexpr int min_parallel_size = 500;
-    if( end > begin ) {
-        if( end - begin < min_parallel_size ) {
-            std::sort(begin, end, comp);
-        } else {
-            parallel_quick_sort(begin, end, comp);
-        }
-    }
-}
-
-//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
-/** @ingroup algorithms **/
-template<typename RandomAccessIterator>
-void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) {
-    parallel_sort(begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>());
-}
-
-//! Sorts the data in rng using the given comparator
-/** @ingroup algorithms **/
-template<typename Range, typename Compare>
-void parallel_sort( Range& rng, const Compare& comp ) {
-    parallel_sort(std::begin(rng), std::end(rng), comp);
-}
-
-//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator>
-/** @ingroup algorithms **/
-template<typename Range>
-void parallel_sort( Range& rng ) {
-    parallel_sort(std::begin(rng), std::end(rng));
-}
-//@}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::d1::parallel_sort;
-} // namespace v1
-} // namespace tbb
-
-#endif /*__TBB_parallel_sort_H*/
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_parallel_sort_H 
+#define __TBB_parallel_sort_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "parallel_for.h" 
+#include "blocked_range.h" 
+#include "profiling.h" 
+ 
+#include <algorithm> 
+#include <iterator> 
+#include <functional> 
+#include <cstddef> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Range used in quicksort to split elements into subranges based on a value. 
+/** The split operation selects a splitter and places all elements less than or equal 
+    to the value in the first range and the remaining elements in the second range. 
+    @ingroup algorithms */ 
+template<typename RandomAccessIterator, typename Compare> 
+class quick_sort_range { 
+    std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const { 
+        return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) ) 
+                                        : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) ); 
+    } 
+ 
+    std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const { 
+        std::size_t offset = range.size / 8u; 
+        return median_of_three(array, 
+                               median_of_three(array, 0 , offset, offset * 2), 
+                               median_of_three(array, offset * 3, offset * 4, offset * 5), 
+                               median_of_three(array, offset * 6, offset * 7, range.size - 1)); 
+ 
+    } 
+ 
+    std::size_t split_range( quick_sort_range& range ) { 
+        RandomAccessIterator array = range.begin; 
+        RandomAccessIterator first_element = range.begin; 
+        std::size_t m = pseudo_median_of_nine(array, range); 
+        if( m != 0 ) std::iter_swap(array, array + m); 
+ 
+        std::size_t i = 0; 
+        std::size_t j = range.size; 
+        // Partition interval [i + 1,j - 1] with key *first_element. 
+        for(;;) { 
+            __TBB_ASSERT( i < j, nullptr ); 
+            // Loop must terminate since array[l] == *first_element. 
+            do { 
+                --j; 
+                __TBB_ASSERT( i <= j, "bad ordering relation?" ); 
+            } while( comp(*first_element, array[j]) ); 
+            do { 
+                __TBB_ASSERT( i <= j, nullptr ); 
+                if( i == j ) goto partition; 
+                ++i; 
+            } while( comp(array[i], *first_element) ); 
+            if( i == j ) goto partition; 
+            std::iter_swap(array + i, array + j); 
+        } 
+partition: 
+        // Put the partition key were it belongs 
+        std::iter_swap(array + j, first_element); 
+        // array[l..j) is less or equal to key. 
+        // array(j..r) is greater or equal to key. 
+        // array[j] is equal to key 
+        i = j + 1; 
+        std::size_t new_range_size = range.size - i; 
+        range.size = j; 
+        return new_range_size; 
+    } 
+ 
+public: 
+    quick_sort_range() = default; 
+    quick_sort_range( const quick_sort_range& ) = default; 
+    void operator=( const quick_sort_range& ) = delete; 
+ 
+    static constexpr std::size_t grainsize = 500; 
+    const Compare& comp; 
+    std::size_t size; 
+    RandomAccessIterator begin; 
+ 
+    quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) : 
+        comp(comp_), size(size_), begin(begin_) {} 
+ 
+    bool empty() const { return size == 0; } 
+    bool is_divisible() const { return size >= grainsize; } 
+ 
+    quick_sort_range( quick_sort_range& range, split ) 
+        : comp(range.comp) 
+        , size(split_range(range)) 
+          // +1 accounts for the pivot element, which is at its correct place 
+          // already and, therefore, is not included into subranges. 
+        , begin(range.begin + range.size + 1) {} 
+}; 
+ 
+//! Body class used to test if elements in a range are presorted 
+/** @ingroup algorithms */ 
+template<typename RandomAccessIterator, typename Compare> 
+class quick_sort_pretest_body { 
+    const Compare& comp; 
+    task_group_context& context; 
+ 
+public: 
+    quick_sort_pretest_body() = default; 
+    quick_sort_pretest_body( const quick_sort_pretest_body& ) = default; 
+    void operator=( const quick_sort_pretest_body& ) = delete; 
+ 
+    quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {} 
+ 
+    void operator()( const blocked_range<RandomAccessIterator>& range ) const { 
+        RandomAccessIterator my_end = range.end(); 
+ 
+        int i = 0; 
+        //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements) 
+        for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) { 
+            if( i % 64 == 0 && context.is_group_execution_cancelled() ) break; 
+ 
+            // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 
+            if( comp(*(k), *(k - 1)) ) { 
+                context.cancel_group_execution(); 
+                break; 
+            } 
+        } 
+    } 
+}; 
+ 
+//! Body class used to sort elements in a range that is smaller than the grainsize. 
+/** @ingroup algorithms */ 
+template<typename RandomAccessIterator, typename Compare> 
+struct quick_sort_body { 
+    void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const { 
+        std::sort(range.begin, range.begin + range.size, range.comp); 
+    } 
+}; 
+ 
+//! Method to perform parallel_for based quick sort. 
+/** @ingroup algorithms */ 
+template<typename RandomAccessIterator, typename Compare> 
+void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { 
+    parallel_for(quick_sort_range<RandomAccessIterator,Compare>(begin, end - begin, comp), 
+                 quick_sort_body<RandomAccessIterator,Compare>(), 
+                 auto_partitioner()); 
+} 
+ 
+//! Wrapper method to initiate the sort by calling parallel_for. 
+/** @ingroup algorithms */ 
+template<typename RandomAccessIterator, typename Compare> 
+void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { 
+    task_group_context my_context(PARALLEL_SORT); 
+    constexpr int serial_cutoff = 9; 
+ 
+    __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" ); 
+    RandomAccessIterator k = begin; 
+    for( ; k != begin + serial_cutoff; ++k ) { 
+        if( comp(*(k + 1), *k) ) { 
+            do_parallel_quick_sort(begin, end, comp); 
+        } 
+    } 
+ 
+    // Check is input range already sorted 
+    parallel_for(blocked_range<RandomAccessIterator>(k + 1, end), 
+                 quick_sort_pretest_body<RandomAccessIterator, Compare>(comp, my_context), 
+                 auto_partitioner(), 
+                 my_context); 
+ 
+    if( my_context.is_group_execution_cancelled() ) 
+        do_parallel_quick_sort(begin, end, comp); 
+} 
+ 
+/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort 
+    Requirements on the iterator type \c It and its value type \c T for \c parallel_sort: 
+ 
+    - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given 
+    iterators \c a and \c b are pointing to. \c It should be a random access iterator. 
+ 
+    - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y; 
+**/ 
+ 
+/** \name parallel_sort 
+    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/ 
+//@{ 
+ 
+//! Sorts the data in [begin,end) using the given comparator 
+/** The compare function object is used for all comparisons between elements during sorting. 
+    The compare object must define a bool operator() function. 
+    @ingroup algorithms **/ 
+template<typename RandomAccessIterator, typename Compare> 
+void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { 
+    constexpr int min_parallel_size = 500; 
+    if( end > begin ) { 
+        if( end - begin < min_parallel_size ) { 
+            std::sort(begin, end, comp); 
+        } else { 
+            parallel_quick_sort(begin, end, comp); 
+        } 
+    } 
+} 
+ 
+//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator> 
+/** @ingroup algorithms **/ 
+template<typename RandomAccessIterator> 
+void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { 
+    parallel_sort(begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>()); 
+} 
+ 
+//! Sorts the data in rng using the given comparator 
+/** @ingroup algorithms **/ 
+template<typename Range, typename Compare> 
+void parallel_sort( Range& rng, const Compare& comp ) { 
+    parallel_sort(std::begin(rng), std::end(rng), comp); 
+} 
+ 
+//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator> 
+/** @ingroup algorithms **/ 
+template<typename Range> 
+void parallel_sort( Range& rng ) { 
+    parallel_sort(std::begin(rng), std::end(rng)); 
+} 
+//@} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::d1::parallel_sort; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /*__TBB_parallel_sort_H*/ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h
index 37ac0a09d9..bd1dc377d0 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h
@@ -1,688 +1,688 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_partitioner_H
-#define __TBB_partitioner_H
-
-#ifndef __TBB_INITIAL_CHUNKS
-// initial task divisions per thread
-#define __TBB_INITIAL_CHUNKS 2
-#endif
-#ifndef __TBB_RANGE_POOL_CAPACITY
-// maximum number of elements in range pool
-#define __TBB_RANGE_POOL_CAPACITY 8
-#endif
-#ifndef __TBB_INIT_DEPTH
-// initial value for depth of range pool
-#define __TBB_INIT_DEPTH 5
-#endif
-#ifndef __TBB_DEMAND_DEPTH_ADD
-// when imbalance is found range splits this value times more
-#define __TBB_DEMAND_DEPTH_ADD 1
-#endif
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_aligned_space.h"
-#include "detail/_utils.h"
-#include "detail/_template_helpers.h"
-#include "detail/_range_common.h"
-#include "detail/_task.h"
-#include "detail/_small_object_pool.h"
-
-#include "cache_aligned_allocator.h"
-#include "task_group.h" // task_group_context
-#include "task_arena.h"
-
-#include <algorithm>
-#include <atomic>
-#include <type_traits>
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // Workaround for overzealous compiler warnings
-    #pragma warning (push)
-    #pragma warning (disable: 4244)
-#endif
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class auto_partitioner;
-class simple_partitioner;
-class static_partitioner;
-class affinity_partitioner;
-class affinity_partition_type;
-class affinity_partitioner_base;
-
-inline std::size_t get_initial_auto_partitioner_divisor() {
-    const std::size_t factor = 4;
-    return factor * max_concurrency();
-}
-
-//! Defines entry point for affinity partitioner into oneTBB run-time library.
-class affinity_partitioner_base: no_copy {
-    friend class affinity_partitioner;
-    friend class affinity_partition_type;
-    //! Array that remembers affinities of tree positions to affinity_id.
-    /** NULL if my_size==0. */
-    slot_id* my_array;
-    //! Number of elements in my_array.
-    std::size_t my_size;
-    //! Zeros the fields.
-    affinity_partitioner_base() : my_array(nullptr), my_size(0) {}
-    //! Deallocates my_array.
-    ~affinity_partitioner_base() { resize(0); }
-    //! Resize my_array.
-    /** Retains values if resulting size is the same. */
-    void resize(unsigned factor) {
-        // Check factor to avoid asking for number of workers while there might be no arena.
-        unsigned max_threads_in_arena = max_concurrency();
-        std::size_t new_size = factor ? factor * max_threads_in_arena : 0;
-        if (new_size != my_size) {
-            if (my_array) {
-                r1::cache_aligned_deallocate(my_array);
-                // Following two assignments must be done here for sake of exception safety.
-                my_array = nullptr;
-                my_size = 0;
-            }
-            if (new_size) {
-                my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id)));
-                std::fill_n(my_array, new_size, no_slot);
-                my_size = new_size;
-            }
-        }
-    }
-};
-
-template<typename Range, typename Body, typename Partitioner> struct start_for;
-template<typename Range, typename Body, typename Partitioner> struct start_scan;
-template<typename Range, typename Body, typename Partitioner> struct start_reduce;
-template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
-
-struct node {
-    node* my_parent{};
-    std::atomic<int> m_ref_count{};
-
-    node() = default;
-    node(node* parent, int ref_count) :
-        my_parent{parent}, m_ref_count{ref_count} {
-        __TBB_ASSERT(ref_count > 0, "The ref count must be positive");
-    }
-};
-
-struct wait_node : node {
-    wait_node() : node{ nullptr, 1 } {}
-    wait_context m_wait{1};
-};
-
-//! Join task node that contains shared flag for stealing feedback
-struct tree_node : public node {
-    small_object_allocator m_allocator;
-    std::atomic<bool> m_child_stolen{false};
-
-    tree_node(node* parent, int ref_count, small_object_allocator& alloc)
-        : node{parent, ref_count}
-        , m_allocator{alloc} {}
-
-    void join(task_group_context*) {/*dummy, required only for reduction algorithms*/};
-
-    template <typename Task>
-    static void mark_task_stolen(Task &t) {
-        std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
-#if TBB_USE_PROFILING_TOOLS
-        // Threading tools respect lock prefix but report false-positive data-race via plain store
-        flag.exchange(true);
-#else
-        flag.store(true, std::memory_order_relaxed);
-#endif // TBB_USE_PROFILING_TOOLS
-    }
-    template <typename Task>
-    static bool is_peer_stolen(Task &t) {
-        return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed);
-    }
-};
-
-// Context used to check cancellation state during reduction join process
-template<typename TreeNodeType>
-void fold_tree(node* n, const execution_data& ed) {
-    for (;;) {
-        __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive.");
-        call_itt_task_notify(releasing, n);
-        if (--n->m_ref_count > 0) {
-            return;
-        }
-        node* parent = n->my_parent;
-        if (!parent) {
-            break;
-        };
-
-        call_itt_task_notify(acquired, n);
-        TreeNodeType* self = static_cast<TreeNodeType*>(n);
-        self->join(ed.context);
-        self->m_allocator.delete_object(self, ed);
-        n = parent;
-    }
-    // Finish parallel for execution when the root (last node) is reached
-    static_cast<wait_node*>(n)->m_wait.release();
-}
-
-//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
-//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
-//! by a number that cannot fit into machine word.
-typedef unsigned char depth_t;
-
-//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
-template <typename T, depth_t MaxCapacity>
-class range_vector {
-    depth_t my_head;
-    depth_t my_tail;
-    depth_t my_size;
-    depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
-    tbb::detail::aligned_space<T, MaxCapacity> my_pool;
-
-public:
-    //! initialize via first range in pool
-    range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
-        my_depth[0] = 0;
-        new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
-    }
-    ~range_vector() {
-        while( !empty() ) pop_back();
-    }
-    bool empty() const { return my_size == 0; }
-    depth_t size() const { return my_size; }
-    //! Populates range pool via ranges up to max depth or while divisible
-    //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
-    void split_to_fill(depth_t max_depth) {
-        while( my_size < MaxCapacity && is_divisible(max_depth) ) {
-            depth_t prev = my_head;
-            my_head = (my_head + 1) % MaxCapacity;
-            new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
-            my_pool.begin()[prev].~T(); // instead of assignment
-            new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split
-            my_depth[my_head] = ++my_depth[prev];
-            my_size++;
-        }
-    }
-    void pop_back() {
-        __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
-        my_pool.begin()[my_head].~T();
-        my_size--;
-        my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
-    }
-    void pop_front() {
-        __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
-        my_pool.begin()[my_tail].~T();
-        my_size--;
-        my_tail = (my_tail + 1) % MaxCapacity;
-    }
-    T& back() {
-        __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
-        return my_pool.begin()[my_head];
-    }
-    T& front() {
-        __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
-        return my_pool.begin()[my_tail];
-    }
-    //! similarly to front(), returns depth of the first range in the pool
-    depth_t front_depth() {
-        __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
-        return my_depth[my_tail];
-    }
-    depth_t back_depth() {
-        __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
-        return my_depth[my_head];
-    }
-    bool is_divisible(depth_t max_depth) {
-        return back_depth() < max_depth && back().is_divisible();
-    }
-};
-
-//! Provides default methods for partition objects and common algorithm blocks.
-template <typename Partition>
-struct partition_type_base {
-    typedef detail::split split_type;
-    // decision makers
-    void note_affinity( slot_id ) {}
-    template <typename Task>
-    bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range()
-    template <typename Range> split_type get_split() { return split(); }
-    Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
-
-    template<typename StartType, typename Range>
-    void work_balance(StartType &start, Range &range, const execution_data&) {
-        start.run_body( range ); // simple partitioner goes always here
-    }
-
-    template<typename StartType, typename Range>
-    void execute(StartType &start, Range &range, execution_data& ed) {
-        // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
-        // [If this task is stolen, adjust depth and divisions if necessary, set flag].
-        // If range is divisible {
-        //    Spread the work while [initial divisions left];
-        //    Create trap task [if necessary];
-        // }
-        // If not divisible or [max depth is reached], execute, else do the range pool part
-        if ( range.is_divisible() ) {
-            if ( self().is_divisible() ) {
-                do { // split until is divisible
-                    typename Partition::split_type split_obj = self().template get_split<Range>();
-                    start.offer_work( split_obj, ed );
-                } while ( range.is_divisible() && self().is_divisible() );
-            }
-        }
-        self().work_balance(start, range, ed);
-    }
-};
-
-//! Provides default splitting strategy for partition objects.
-template <typename Partition>
-struct adaptive_mode : partition_type_base<Partition> {
-    typedef Partition my_partition;
-    std::size_t my_divisor;
-    // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
-    // A task which has only one index must produce the right split without reserved index in order to avoid
-    // it to be overwritten in note_affinity() of the created (right) task.
-    // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
-    static const unsigned factor = 1;
-    adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
-    adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
-    /*! Override do_split methods in order to specify splitting strategy */
-    std::size_t do_split(adaptive_mode &src, split) {
-        return src.my_divisor /= 2u;
-    }
-};
-
-//! Helper type for checking availability of proportional_split constructor
-template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>;
-
-//! A helper class to create a proportional_split object for a given type of Range.
-/** If the Range has proportional_split constructor,
-    then created object splits a provided value in an implemenation-defined proportion;
-    otherwise it represents equal-size split. */
-// TODO: check if this helper can be a nested class of proportional_mode.
-template <typename Range, typename = void>
-struct proportion_helper {
-    static proportional_split get_split(std::size_t) { return proportional_split(1,1); }
-};
-
-template <typename Range>
-struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> {
-    static proportional_split get_split(std::size_t n) {
-        std::size_t right = n / 2;
-        std::size_t left  = n - right;
-        return proportional_split(left, right);
-    }
-};
-
-//! Provides proportional splitting strategy for partition objects
-template <typename Partition>
-struct proportional_mode : adaptive_mode<Partition> {
-    typedef Partition my_partition;
-    using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
-
-    proportional_mode() : adaptive_mode<Partition>() {}
-    proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
-    proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); }
-    std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
-        std::size_t portion = split_obj.right() * my_partition::factor;
-        portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
-        src.my_divisor -= portion;
-        return portion;
-    }
-    bool is_divisible() { // part of old should_execute_range()
-        return self().my_divisor > my_partition::factor;
-    }
-    template <typename Range>
-    proportional_split get_split() {
-        // Create a proportion for the number of threads expected to handle "this" subrange
-        return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor );
-    }
-};
-
-static std::size_t get_initial_partition_head() {
-    int current_index = tbb::this_task_arena::current_thread_index();
-    if (current_index == tbb::task_arena::not_initialized)
-        current_index = 0;
-    return size_t(current_index);
-}
-
-//! Provides default linear indexing of partitioner's sequence
-template <typename Partition>
-struct linear_affinity_mode : proportional_mode<Partition> {
-    std::size_t my_head;
-    std::size_t my_max_affinity;
-    using proportional_mode<Partition>::self;
-    linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
-                             my_max_affinity(self().my_divisor) {}
-    linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
-        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
-    linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
-        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
-    void spawn_task(task& t, task_group_context& ctx) {
-        if (self().my_divisor) {
-            spawn(t, ctx, slot_id(my_head));
-        } else {
-            spawn(t, ctx);
-        }
-    }
-};
-
-static bool is_stolen_task(const execution_data& ed) {
-    return execution_slot(ed) != original_slot(ed);
-}
-
-/*! Determine work-balance phase implementing splitting & stealing actions */
-template<class Mode>
-struct dynamic_grainsize_mode : Mode {
-    using Mode::self;
-    enum {
-        begin = 0,
-        run,
-        pass
-    } my_delay;
-    depth_t my_max_depth;
-    static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
-    dynamic_grainsize_mode(): Mode()
-        , my_delay(begin)
-        , my_max_depth(__TBB_INIT_DEPTH) {}
-    dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
-        : Mode(p, split())
-        , my_delay(pass)
-        , my_max_depth(p.my_max_depth) {}
-    dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
-        : Mode(p, split_obj)
-        , my_delay(begin)
-        , my_max_depth(p.my_max_depth) {}
-    template <typename Task>
-    bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range()
-        if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
-            self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
-            if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task
-#if __TBB_USE_OPTIONAL_RTTI
-                // RTTI is available, check whether the cast is valid
-                // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0);
-                // correctness of the cast relies on avoiding the root task for which:
-                // - initial value of my_divisor != 0 (protected by separate assertion)
-                // - is_stolen_task() always returns false for the root task.
-#endif
-                tree_node::mark_task_stolen(t);
-                if( !my_max_depth ) my_max_depth++;
-                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-                return true;
-            }
-        }
-        return false;
-    }
-    depth_t max_depth() { return my_max_depth; }
-    void align_depth(depth_t base) {
-        __TBB_ASSERT(base <= my_max_depth, 0);
-        my_max_depth -= base;
-    }
-    template<typename StartType, typename Range>
-    void work_balance(StartType &start, Range &range, execution_data& ed) {
-        if( !range.is_divisible() || !self().max_depth() ) {
-            start.run_body( range ); // simple partitioner goes always here
-        }
-        else { // do range pool
-            range_vector<Range, range_pool_size> range_pool(range);
-            do {
-                range_pool.split_to_fill(self().max_depth()); // fill range pool
-                if( self().check_for_demand( start ) ) {
-                    if( range_pool.size() > 1 ) {
-                        start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
-                        range_pool.pop_front();
-                        continue;
-                    }
-                    if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
-                        continue; // note: next split_to_fill() should split range at least once
-                }
-                start.run_body( range_pool.back() );
-                range_pool.pop_back();
-            } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
-        }
-    }
-    template <typename Task>
-    bool check_for_demand(Task& t) {
-        if ( pass == my_delay ) {
-            if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
-                return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
-            else if ( self().my_divisor && my_max_depth ) { // make balancing task
-                self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
-                return true;
-            }
-            else if ( tree_node::is_peer_stolen(t) ) {
-                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-                return true;
-            }
-        } else if( begin == my_delay ) {
-            my_delay = pass;
-        }
-        return false;
-    }
-};
-
-class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
-public:
-    auto_partition_type( const auto_partitioner& )
-        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() {
-        my_divisor *= __TBB_INITIAL_CHUNKS;
-    }
-    auto_partition_type( auto_partition_type& src, split)
-        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
-    bool is_divisible() { // part of old should_execute_range()
-        if( my_divisor > 1 ) return true;
-        if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
-            // keep same fragmentation while splitting for the local task pool
-            my_max_depth--;
-            my_divisor = 0; // decrease max_depth once per task
-            return true;
-        } else return false;
-    }
-    template <typename Task>
-    bool check_for_demand(Task& t) {
-        if (tree_node::is_peer_stolen(t)) {
-            my_max_depth += __TBB_DEMAND_DEPTH_ADD;
-            return true;
-        } else return false;
-    }
-    void spawn_task(task& t, task_group_context& ctx) {
-        spawn(t, ctx);
-    }
-};
-
-class simple_partition_type: public partition_type_base<simple_partition_type> {
-public:
-    simple_partition_type( const simple_partitioner& ) {}
-    simple_partition_type( const simple_partition_type&, split ) {}
-    //! simplified algorithm
-    template<typename StartType, typename Range>
-    void execute(StartType &start, Range &range, execution_data& ed) {
-        split_type split_obj = split(); // start.offer_work accepts split_type as reference
-        while( range.is_divisible() )
-            start.offer_work( split_obj, ed );
-        start.run_body( range );
-    }
-    void spawn_task(task& t, task_group_context& ctx) {
-        spawn(t, ctx);
-    }
-};
-
-class static_partition_type : public linear_affinity_mode<static_partition_type> {
-public:
-    typedef detail::proportional_split split_type;
-    static_partition_type( const static_partitioner& )
-        : linear_affinity_mode<static_partition_type>() {}
-    static_partition_type( static_partition_type& p, const proportional_split& split_obj )
-        : linear_affinity_mode<static_partition_type>(p, split_obj) {}
-};
-
-class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
-    static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
-    slot_id* my_array;
-public:
-    static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
-    typedef detail::proportional_split split_type;
-    affinity_partition_type( affinity_partitioner_base& ap )
-        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() {
-        __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
-        ap.resize(factor);
-        my_array = ap.my_array;
-        my_max_depth = factor_power + 1;
-        __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
-    }
-    affinity_partition_type(affinity_partition_type& p, split)
-        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
-        , my_array(p.my_array) {}
-    affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
-        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
-        , my_array(p.my_array) {}
-    void note_affinity(slot_id id) {
-        if( my_divisor )
-            my_array[my_head] = id;
-    }
-    void spawn_task(task& t, task_group_context& ctx) {
-        if (my_divisor) {
-            if (!my_array[my_head]) {
-                // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
-                spawn(t, ctx, slot_id(my_head / factor));
-            } else {
-                spawn(t, ctx, my_array[my_head]);
-            }
-        } else {
-            spawn(t, ctx);
-        }
-    }
-};
-
-//! A simple partitioner
-/** Divides the range until the range is not divisible.
-    @ingroup algorithms */
-class simple_partitioner {
-public:
-    simple_partitioner() {}
-private:
-    template<typename Range, typename Body, typename Partitioner> friend struct start_for;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
-    // new implementation just extends existing interface
-    typedef simple_partition_type task_partition_type;
-    // TODO: consider to make split_type public
-    typedef simple_partition_type::split_type split_type;
-
-    // for parallel_scan only
-    class partition_type {
-    public:
-        bool should_execute_range(const execution_data& ) {return false;}
-        partition_type( const simple_partitioner& ) {}
-        partition_type( const partition_type&, split ) {}
-    };
-};
-
-//! An auto partitioner
-/** The range is initial divided into several large chunks.
-    Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
-    @ingroup algorithms */
-class auto_partitioner {
-public:
-    auto_partitioner() {}
-
-private:
-    template<typename Range, typename Body, typename Partitioner> friend struct start_for;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
-    // new implementation just extends existing interface
-    typedef auto_partition_type task_partition_type;
-    // TODO: consider to make split_type public
-    typedef auto_partition_type::split_type split_type;
-
-    //! Backward-compatible partition for auto and affinity partition objects.
-    class partition_type {
-        size_t num_chunks;
-        static const size_t VICTIM_CHUNKS = 4;
-        public:
-        bool should_execute_range(const execution_data& ed) {
-            if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
-                num_chunks = VICTIM_CHUNKS;
-            return num_chunks==1;
-        }
-        partition_type( const auto_partitioner& )
-            : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
-        partition_type( partition_type& pt, split ) {
-            num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
-        }
-    };
-};
-
-//! A static partitioner
-class static_partitioner {
-public:
-    static_partitioner() {}
-private:
-    template<typename Range, typename Body, typename Partitioner> friend struct start_for;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
-    // new implementation just extends existing interface
-    typedef static_partition_type task_partition_type;
-    // TODO: consider to make split_type public
-    typedef static_partition_type::split_type split_type;
-};
-
-//! An affinity partitioner
-class affinity_partitioner : affinity_partitioner_base {
-public:
-    affinity_partitioner() {}
-
-private:
-    template<typename Range, typename Body, typename Partitioner> friend struct start_for;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
-    template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
-    // new implementation just extends existing interface
-    typedef affinity_partition_type task_partition_type;
-    // TODO: consider to make split_type public
-    typedef affinity_partition_type::split_type split_type;
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-// Partitioners
-using detail::d1::auto_partitioner;
-using detail::d1::simple_partitioner;
-using detail::d1::static_partitioner;
-using detail::d1::affinity_partitioner;
-// Split types
-using detail::split;
-using detail::proportional_split;
-} // namespace v1
-
-} // namespace tbb
-
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    #pragma warning (pop)
-#endif // warning 4244 is back
-
-#undef __TBB_INITIAL_CHUNKS
-#undef __TBB_RANGE_POOL_CAPACITY
-#undef __TBB_INIT_DEPTH
-
-#endif /* __TBB_partitioner_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_partitioner_H 
+#define __TBB_partitioner_H 
+ 
+#ifndef __TBB_INITIAL_CHUNKS 
+// initial task divisions per thread 
+#define __TBB_INITIAL_CHUNKS 2 
+#endif 
+#ifndef __TBB_RANGE_POOL_CAPACITY 
+// maximum number of elements in range pool 
+#define __TBB_RANGE_POOL_CAPACITY 8 
+#endif 
+#ifndef __TBB_INIT_DEPTH 
+// initial value for depth of range pool 
+#define __TBB_INIT_DEPTH 5 
+#endif 
+#ifndef __TBB_DEMAND_DEPTH_ADD 
+// when imbalance is found range splits this value times more 
+#define __TBB_DEMAND_DEPTH_ADD 1 
+#endif 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_aligned_space.h" 
+#include "detail/_utils.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_range_common.h" 
+#include "detail/_task.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#include "cache_aligned_allocator.h" 
+#include "task_group.h" // task_group_context 
+#include "task_arena.h" 
+ 
+#include <algorithm> 
+#include <atomic> 
+#include <type_traits> 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+    // Workaround for overzealous compiler warnings 
+    #pragma warning (push) 
+    #pragma warning (disable: 4244) 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class auto_partitioner; 
+class simple_partitioner; 
+class static_partitioner; 
+class affinity_partitioner; 
+class affinity_partition_type; 
+class affinity_partitioner_base; 
+ 
+inline std::size_t get_initial_auto_partitioner_divisor() { 
+    const std::size_t factor = 4; 
+    return factor * max_concurrency(); 
+} 
+ 
+//! Defines entry point for affinity partitioner into oneTBB run-time library. 
+class affinity_partitioner_base: no_copy { 
+    friend class affinity_partitioner; 
+    friend class affinity_partition_type; 
+    //! Array that remembers affinities of tree positions to affinity_id. 
+    /** NULL if my_size==0. */ 
+    slot_id* my_array; 
+    //! Number of elements in my_array. 
+    std::size_t my_size; 
+    //! Zeros the fields. 
+    affinity_partitioner_base() : my_array(nullptr), my_size(0) {} 
+    //! Deallocates my_array. 
+    ~affinity_partitioner_base() { resize(0); } 
+    //! Resize my_array. 
+    /** Retains values if resulting size is the same. */ 
+    void resize(unsigned factor) { 
+        // Check factor to avoid asking for number of workers while there might be no arena. 
+        unsigned max_threads_in_arena = max_concurrency(); 
+        std::size_t new_size = factor ? factor * max_threads_in_arena : 0; 
+        if (new_size != my_size) { 
+            if (my_array) { 
+                r1::cache_aligned_deallocate(my_array); 
+                // Following two assignments must be done here for sake of exception safety. 
+                my_array = nullptr; 
+                my_size = 0; 
+            } 
+            if (new_size) { 
+                my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id))); 
+                std::fill_n(my_array, new_size, no_slot); 
+                my_size = new_size; 
+            } 
+        } 
+    } 
+}; 
+ 
+template<typename Range, typename Body, typename Partitioner> struct start_for; 
+template<typename Range, typename Body, typename Partitioner> struct start_scan; 
+template<typename Range, typename Body, typename Partitioner> struct start_reduce; 
+template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce; 
+ 
+struct node { 
+    node* my_parent{}; 
+    std::atomic<int> m_ref_count{}; 
+ 
+    node() = default; 
+    node(node* parent, int ref_count) : 
+        my_parent{parent}, m_ref_count{ref_count} { 
+        __TBB_ASSERT(ref_count > 0, "The ref count must be positive"); 
+    } 
+}; 
+ 
+struct wait_node : node { 
+    wait_node() : node{ nullptr, 1 } {} 
+    wait_context m_wait{1}; 
+}; 
+ 
+//! Join task node that contains shared flag for stealing feedback 
+struct tree_node : public node { 
+    small_object_allocator m_allocator; 
+    std::atomic<bool> m_child_stolen{false}; 
+ 
+    tree_node(node* parent, int ref_count, small_object_allocator& alloc) 
+        : node{parent, ref_count} 
+        , m_allocator{alloc} {} 
+ 
+    void join(task_group_context*) {/*dummy, required only for reduction algorithms*/}; 
+ 
+    template <typename Task> 
+    static void mark_task_stolen(Task &t) { 
+        std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen; 
+#if TBB_USE_PROFILING_TOOLS 
+        // Threading tools respect lock prefix but report false-positive data-race via plain store 
+        flag.exchange(true); 
+#else 
+        flag.store(true, std::memory_order_relaxed); 
+#endif // TBB_USE_PROFILING_TOOLS 
+    } 
+    template <typename Task> 
+    static bool is_peer_stolen(Task &t) { 
+        return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed); 
+    } 
+}; 
+ 
+// Context used to check cancellation state during reduction join process 
+template<typename TreeNodeType> 
+void fold_tree(node* n, const execution_data& ed) { 
+    for (;;) { 
+        __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); 
+        call_itt_task_notify(releasing, n); 
+        if (--n->m_ref_count > 0) { 
+            return; 
+        } 
+        node* parent = n->my_parent; 
+        if (!parent) { 
+            break; 
+        }; 
+ 
+        call_itt_task_notify(acquired, n); 
+        TreeNodeType* self = static_cast<TreeNodeType*>(n); 
+        self->join(ed.context); 
+        self->m_allocator.delete_object(self, ed); 
+        n = parent; 
+    } 
+    // Finish parallel for execution when the root (last node) is reached 
+    static_cast<wait_node*>(n)->m_wait.release(); 
+} 
+ 
+//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows 
+//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented 
+//! by a number that cannot fit into machine word. 
+typedef unsigned char depth_t; 
+ 
+//! Range pool stores ranges of type T in a circular buffer with MaxCapacity 
+template <typename T, depth_t MaxCapacity> 
+class range_vector { 
+    depth_t my_head; 
+    depth_t my_tail; 
+    depth_t my_size; 
+    depth_t my_depth[MaxCapacity]; // relative depths of stored ranges 
+    tbb::detail::aligned_space<T, MaxCapacity> my_pool; 
+ 
+public: 
+    //! initialize via first range in pool 
+    range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { 
+        my_depth[0] = 0; 
+        new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move? 
+    } 
+    ~range_vector() { 
+        while( !empty() ) pop_back(); 
+    } 
+    bool empty() const { return my_size == 0; } 
+    depth_t size() const { return my_size; } 
+    //! Populates range pool via ranges up to max depth or while divisible 
+    //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces 
+    void split_to_fill(depth_t max_depth) { 
+        while( my_size < MaxCapacity && is_divisible(max_depth) ) { 
+            depth_t prev = my_head; 
+            my_head = (my_head + 1) % MaxCapacity; 
+            new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? 
+            my_pool.begin()[prev].~T(); // instead of assignment 
+            new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split 
+            my_depth[my_head] = ++my_depth[prev]; 
+            my_size++; 
+        } 
+    } 
+    void pop_back() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size"); 
+        my_pool.begin()[my_head].~T(); 
+        my_size--; 
+        my_head = (my_head + MaxCapacity - 1) % MaxCapacity; 
+    } 
+    void pop_front() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size"); 
+        my_pool.begin()[my_tail].~T(); 
+        my_size--; 
+        my_tail = (my_tail + 1) % MaxCapacity; 
+    } 
+    T& back() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); 
+        return my_pool.begin()[my_head]; 
+    } 
+    T& front() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); 
+        return my_pool.begin()[my_tail]; 
+    } 
+    //! similarly to front(), returns depth of the first range in the pool 
+    depth_t front_depth() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size"); 
+        return my_depth[my_tail]; 
+    } 
+    depth_t back_depth() { 
+        __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size"); 
+        return my_depth[my_head]; 
+    } 
+    bool is_divisible(depth_t max_depth) { 
+        return back_depth() < max_depth && back().is_divisible(); 
+    } 
+}; 
+ 
+//! Provides default methods for partition objects and common algorithm blocks. 
+template <typename Partition> 
+struct partition_type_base { 
+    typedef detail::split split_type; 
+    // decision makers 
+    void note_affinity( slot_id ) {} 
+    template <typename Task> 
+    bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() 
+    template <typename Range> split_type get_split() { return split(); } 
+    Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper 
+ 
+    template<typename StartType, typename Range> 
+    void work_balance(StartType &start, Range &range, const execution_data&) { 
+        start.run_body( range ); // simple partitioner goes always here 
+    } 
+ 
+    template<typename StartType, typename Range> 
+    void execute(StartType &start, Range &range, execution_data& ed) { 
+        // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): 
+        // [If this task is stolen, adjust depth and divisions if necessary, set flag]. 
+        // If range is divisible { 
+        //    Spread the work while [initial divisions left]; 
+        //    Create trap task [if necessary]; 
+        // } 
+        // If not divisible or [max depth is reached], execute, else do the range pool part 
+        if ( range.is_divisible() ) { 
+            if ( self().is_divisible() ) { 
+                do { // split until is divisible 
+                    typename Partition::split_type split_obj = self().template get_split<Range>(); 
+                    start.offer_work( split_obj, ed ); 
+                } while ( range.is_divisible() && self().is_divisible() ); 
+            } 
+        } 
+        self().work_balance(start, range, ed); 
+    } 
+}; 
+ 
+//! Provides default splitting strategy for partition objects. 
+template <typename Partition> 
+struct adaptive_mode : partition_type_base<Partition> { 
+    typedef Partition my_partition; 
+    std::size_t my_divisor; 
+    // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. 
+    // A task which has only one index must produce the right split without reserved index in order to avoid 
+    // it to be overwritten in note_affinity() of the created (right) task. 
+    // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) 
+    static const unsigned factor = 1; 
+    adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} 
+    adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} 
+    /*! Override do_split methods in order to specify splitting strategy */ 
+    std::size_t do_split(adaptive_mode &src, split) { 
+        return src.my_divisor /= 2u; 
+    } 
+}; 
+ 
+//! Helper type for checking availability of proportional_split constructor 
+template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>; 
+ 
+//! A helper class to create a proportional_split object for a given type of Range. 
+/** If the Range has proportional_split constructor, 
+    then created object splits a provided value in an implemenation-defined proportion; 
+    otherwise it represents equal-size split. */ 
+// TODO: check if this helper can be a nested class of proportional_mode. 
+template <typename Range, typename = void> 
+struct proportion_helper { 
+    static proportional_split get_split(std::size_t) { return proportional_split(1,1); } 
+}; 
+ 
+template <typename Range> 
+struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> { 
+    static proportional_split get_split(std::size_t n) { 
+        std::size_t right = n / 2; 
+        std::size_t left  = n - right; 
+        return proportional_split(left, right); 
+    } 
+}; 
+ 
+//! Provides proportional splitting strategy for partition objects 
+template <typename Partition> 
+struct proportional_mode : adaptive_mode<Partition> { 
+    typedef Partition my_partition; 
+    using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes 
+ 
+    proportional_mode() : adaptive_mode<Partition>() {} 
+    proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} 
+    proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } 
+    std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { 
+        std::size_t portion = split_obj.right() * my_partition::factor; 
+        portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); 
+        src.my_divisor -= portion; 
+        return portion; 
+    } 
+    bool is_divisible() { // part of old should_execute_range() 
+        return self().my_divisor > my_partition::factor; 
+    } 
+    template <typename Range> 
+    proportional_split get_split() { 
+        // Create a proportion for the number of threads expected to handle "this" subrange 
+        return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor ); 
+    } 
+}; 
+ 
+static std::size_t get_initial_partition_head() { 
+    int current_index = tbb::this_task_arena::current_thread_index(); 
+    if (current_index == tbb::task_arena::not_initialized) 
+        current_index = 0; 
+    return size_t(current_index); 
+} 
+ 
+//! Provides default linear indexing of partitioner's sequence 
+template <typename Partition> 
+struct linear_affinity_mode : proportional_mode<Partition> { 
+    std::size_t my_head; 
+    std::size_t my_max_affinity; 
+    using proportional_mode<Partition>::self; 
+    linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()), 
+                             my_max_affinity(self().my_divisor) {} 
+    linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split()) 
+        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} 
+    linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj) 
+        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} 
+    void spawn_task(task& t, task_group_context& ctx) { 
+        if (self().my_divisor) { 
+            spawn(t, ctx, slot_id(my_head)); 
+        } else { 
+            spawn(t, ctx); 
+        } 
+    } 
+}; 
+ 
+static bool is_stolen_task(const execution_data& ed) { 
+    return execution_slot(ed) != original_slot(ed); 
+} 
+ 
+/*! Determine work-balance phase implementing splitting & stealing actions */ 
+template<class Mode> 
+struct dynamic_grainsize_mode : Mode { 
+    using Mode::self; 
+    enum { 
+        begin = 0, 
+        run, 
+        pass 
+    } my_delay; 
+    depth_t my_max_depth; 
+    static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; 
+    dynamic_grainsize_mode(): Mode() 
+        , my_delay(begin) 
+        , my_max_depth(__TBB_INIT_DEPTH) {} 
+    dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) 
+        : Mode(p, split()) 
+        , my_delay(pass) 
+        , my_max_depth(p.my_max_depth) {} 
+    dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) 
+        : Mode(p, split_obj) 
+        , my_delay(begin) 
+        , my_max_depth(p.my_max_depth) {} 
+    template <typename Task> 
+    bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range() 
+        if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree 
+            self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? 
+            if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task 
+#if __TBB_USE_OPTIONAL_RTTI 
+                // RTTI is available, check whether the cast is valid 
+                // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0); 
+                // correctness of the cast relies on avoiding the root task for which: 
+                // - initial value of my_divisor != 0 (protected by separate assertion) 
+                // - is_stolen_task() always returns false for the root task. 
+#endif 
+                tree_node::mark_task_stolen(t); 
+                if( !my_max_depth ) my_max_depth++; 
+                my_max_depth += __TBB_DEMAND_DEPTH_ADD; 
+                return true; 
+            } 
+        } 
+        return false; 
+    } 
+    depth_t max_depth() { return my_max_depth; } 
+    void align_depth(depth_t base) { 
+        __TBB_ASSERT(base <= my_max_depth, 0); 
+        my_max_depth -= base; 
+    } 
+    template<typename StartType, typename Range> 
+    void work_balance(StartType &start, Range &range, execution_data& ed) { 
+        if( !range.is_divisible() || !self().max_depth() ) { 
+            start.run_body( range ); // simple partitioner goes always here 
+        } 
+        else { // do range pool 
+            range_vector<Range, range_pool_size> range_pool(range); 
+            do { 
+                range_pool.split_to_fill(self().max_depth()); // fill range pool 
+                if( self().check_for_demand( start ) ) { 
+                    if( range_pool.size() > 1 ) { 
+                        start.offer_work( range_pool.front(), range_pool.front_depth(), ed ); 
+                        range_pool.pop_front(); 
+                        continue; 
+                    } 
+                    if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task 
+                        continue; // note: next split_to_fill() should split range at least once 
+                } 
+                start.run_body( range_pool.back() ); 
+                range_pool.pop_back(); 
+            } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() ); 
+        } 
+    } 
+    template <typename Task> 
+    bool check_for_demand(Task& t) { 
+        if ( pass == my_delay ) { 
+            if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array 
+                return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more 
+            else if ( self().my_divisor && my_max_depth ) { // make balancing task 
+                self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() 
+                return true; 
+            } 
+            else if ( tree_node::is_peer_stolen(t) ) { 
+                my_max_depth += __TBB_DEMAND_DEPTH_ADD; 
+                return true; 
+            } 
+        } else if( begin == my_delay ) { 
+            my_delay = pass; 
+        } 
+        return false; 
+    } 
+}; 
+ 
+class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { 
+public: 
+    auto_partition_type( const auto_partitioner& ) 
+        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { 
+        my_divisor *= __TBB_INITIAL_CHUNKS; 
+    } 
+    auto_partition_type( auto_partition_type& src, split) 
+        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {} 
+    bool is_divisible() { // part of old should_execute_range() 
+        if( my_divisor > 1 ) return true; 
+        if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead 
+            // keep same fragmentation while splitting for the local task pool 
+            my_max_depth--; 
+            my_divisor = 0; // decrease max_depth once per task 
+            return true; 
+        } else return false; 
+    } 
+    template <typename Task> 
+    bool check_for_demand(Task& t) { 
+        if (tree_node::is_peer_stolen(t)) { 
+            my_max_depth += __TBB_DEMAND_DEPTH_ADD; 
+            return true; 
+        } else return false; 
+    } 
+    void spawn_task(task& t, task_group_context& ctx) { 
+        spawn(t, ctx); 
+    } 
+}; 
+ 
+class simple_partition_type: public partition_type_base<simple_partition_type> { 
+public: 
+    simple_partition_type( const simple_partitioner& ) {} 
+    simple_partition_type( const simple_partition_type&, split ) {} 
+    //! simplified algorithm 
+    template<typename StartType, typename Range> 
+    void execute(StartType &start, Range &range, execution_data& ed) { 
+        split_type split_obj = split(); // start.offer_work accepts split_type as reference 
+        while( range.is_divisible() ) 
+            start.offer_work( split_obj, ed ); 
+        start.run_body( range ); 
+    } 
+    void spawn_task(task& t, task_group_context& ctx) { 
+        spawn(t, ctx); 
+    } 
+}; 
+ 
+class static_partition_type : public linear_affinity_mode<static_partition_type> { 
+public: 
+    typedef detail::proportional_split split_type; 
+    static_partition_type( const static_partitioner& ) 
+        : linear_affinity_mode<static_partition_type>() {} 
+    static_partition_type( static_partition_type& p, const proportional_split& split_obj ) 
+        : linear_affinity_mode<static_partition_type>(p, split_obj) {} 
+}; 
+ 
+class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > { 
+    static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units 
+    slot_id* my_array; 
+public: 
+    static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task 
+    typedef detail::proportional_split split_type; 
+    affinity_partition_type( affinity_partitioner_base& ap ) 
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { 
+        __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); 
+        ap.resize(factor); 
+        my_array = ap.my_array; 
+        my_max_depth = factor_power + 1; 
+        __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); 
+    } 
+    affinity_partition_type(affinity_partition_type& p, split) 
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) 
+        , my_array(p.my_array) {} 
+    affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) 
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj) 
+        , my_array(p.my_array) {} 
+    void note_affinity(slot_id id) { 
+        if( my_divisor ) 
+            my_array[my_head] = id; 
+    } 
+    void spawn_task(task& t, task_group_context& ctx) { 
+        if (my_divisor) { 
+            if (!my_array[my_head]) { 
+                // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse 
+                spawn(t, ctx, slot_id(my_head / factor)); 
+            } else { 
+                spawn(t, ctx, my_array[my_head]); 
+            } 
+        } else { 
+            spawn(t, ctx); 
+        } 
+    } 
+}; 
+ 
+//! A simple partitioner 
+/** Divides the range until the range is not divisible. 
+    @ingroup algorithms */ 
+class simple_partitioner { 
+public: 
+    simple_partitioner() {} 
+private: 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_for; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_scan; 
+    // new implementation just extends existing interface 
+    typedef simple_partition_type task_partition_type; 
+    // TODO: consider to make split_type public 
+    typedef simple_partition_type::split_type split_type; 
+ 
+    // for parallel_scan only 
+    class partition_type { 
+    public: 
+        bool should_execute_range(const execution_data& ) {return false;} 
+        partition_type( const simple_partitioner& ) {} 
+        partition_type( const partition_type&, split ) {} 
+    }; 
+}; 
+ 
+//! An auto partitioner 
+/** The range is initial divided into several large chunks. 
+    Chunks are further subdivided into smaller pieces if demand detected and they are divisible. 
+    @ingroup algorithms */ 
+class auto_partitioner { 
+public: 
+    auto_partitioner() {} 
+ 
+private: 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_for; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_scan; 
+    // new implementation just extends existing interface 
+    typedef auto_partition_type task_partition_type; 
+    // TODO: consider to make split_type public 
+    typedef auto_partition_type::split_type split_type; 
+ 
+    //! Backward-compatible partition for auto and affinity partition objects. 
+    class partition_type { 
+        size_t num_chunks; 
+        static const size_t VICTIM_CHUNKS = 4; 
+        public: 
+        bool should_execute_range(const execution_data& ed) { 
+            if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) ) 
+                num_chunks = VICTIM_CHUNKS; 
+            return num_chunks==1; 
+        } 
+        partition_type( const auto_partitioner& ) 
+            : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} 
+        partition_type( partition_type& pt, split ) { 
+            num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; 
+        } 
+    }; 
+}; 
+ 
+//! A static partitioner 
+class static_partitioner { 
+public: 
+    static_partitioner() {} 
+private: 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_for; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_scan; 
+    // new implementation just extends existing interface 
+    typedef static_partition_type task_partition_type; 
+    // TODO: consider to make split_type public 
+    typedef static_partition_type::split_type split_type; 
+}; 
+ 
+//! An affinity partitioner 
+class affinity_partitioner : affinity_partitioner_base { 
+public: 
+    affinity_partitioner() {} 
+ 
+private: 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_for; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; 
+    template<typename Range, typename Body, typename Partitioner> friend struct start_scan; 
+    // new implementation just extends existing interface 
+    typedef affinity_partition_type task_partition_type; 
+    // TODO: consider to make split_type public 
+    typedef affinity_partition_type::split_type split_type; 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+// Partitioners 
+using detail::d1::auto_partitioner; 
+using detail::d1::simple_partitioner; 
+using detail::d1::static_partitioner; 
+using detail::d1::affinity_partitioner; 
+// Split types 
+using detail::split; 
+using detail::proportional_split; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 
+    #pragma warning (pop) 
+#endif // warning 4244 is back 
+ 
+#undef __TBB_INITIAL_CHUNKS 
+#undef __TBB_RANGE_POOL_CAPACITY 
+#undef __TBB_INIT_DEPTH 
+ 
+#endif /* __TBB_partitioner_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/profiling.h b/contrib/libs/tbb/include/oneapi/tbb/profiling.h
index 4b62da2060..7a90d71f60 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/profiling.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/profiling.h
@@ -1,243 +1,243 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_profiling_H
-#define __TBB_profiling_H
-
-#include "detail/_config.h"
-#include <cstdint>
-
-#include <string>
-
-namespace tbb {
-namespace detail {
-inline namespace d0 {
-    // include list of index names
-    #define TBB_STRING_RESOURCE(index_name,str) index_name,
-    enum string_resource_index : std::uintptr_t {
-        #include "detail/_string_resource.h"
-        NUM_STRINGS
-    };
-    #undef TBB_STRING_RESOURCE
-
-    enum itt_relation
-    {
-    __itt_relation_is_unknown = 0,
-    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */
-    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */
-    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */
-    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */
-    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
-    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
-    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
-    };
-
-//! Unicode support
-#if (_WIN32||_WIN64) && !__MINGW32__
-    //! Unicode character type. Always wchar_t on Windows.
-    using tchar = wchar_t;
-#else /* !WIN */
-    using tchar = char;
-#endif /* !WIN */
-
-} // namespace d0
-} // namespace detail
-} // namespace tbb
-
-#include <atomic>
-#if _WIN32||_WIN64
-#include <stdlib.h>  /* mbstowcs_s */
-#endif
-// Need these to work regardless of tools support
-namespace tbb {
-namespace detail {
-namespace d1 {
-    enum notify_type {prepare=0, cancel, acquired, releasing, destroy};
-    enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS };
-} // namespace d1
-
-namespace r1 {
-    void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr);
-    void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname);
-    void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra,
-        void* parent, unsigned long long parent_extra, string_resource_index name_index);
-    void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra,
-        void* parent, unsigned long long parent_extra, string_resource_index name_index);
-    void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain);
-    void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name);
-    void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra,
-        string_resource_index key, const char* value);
-    void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra,
-        string_resource_index key, void* value);
-    void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra,
-        itt_relation relation, void* addr1, unsigned long long addr1_extra);
-    void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra,
-        void* parent, unsigned long long parent_extra, string_resource_index /* name_index */);
-    void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra);
-} // namespace r1
-
-namespace d1 {
-#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__
-    inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) {
-        std::size_t len;
-        mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE);
-        return len;   // mbstowcs_s counts null terminator
-    }
-#endif
-
-#if TBB_USE_PROFILING_TOOLS
-    inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) {
-#if (_WIN32||_WIN64) && !__MINGW32__
-        std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0);
-        wchar_t *type = new wchar_t[len_type];
-        multibyte_to_widechar(type, objtype, len_type);
-        std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0);
-        wchar_t *name = new wchar_t[len_name];
-        multibyte_to_widechar(name, objname, len_name);
-#else // WIN
-        const char *type = objtype;
-        const char *name = objname;
-#endif
-        r1::create_itt_sync(ptr, type, name);
-
-#if (_WIN32||_WIN64) && !__MINGW32__
-        delete[] type;
-        delete[] name;
-#endif // WIN
-    }
-
-// Distinguish notifications on task for reducing overheads
-#if TBB_USE_PROFILING_TOOLS == 2
-    inline void call_itt_task_notify(d1::notify_type t, void *ptr) {
-        r1::call_itt_notify((int)t, ptr);
-    }
-#else
-    inline void call_itt_task_notify(d1::notify_type, void *) {}
-#endif // TBB_USE_PROFILING_TOOLS
-
-    inline void call_itt_notify(d1::notify_type t, void *ptr) {
-        r1::call_itt_notify((int)t, ptr);
-    }
-
-#if (_WIN32||_WIN64) && !__MINGW32__
-    inline void itt_set_sync_name(void* obj, const wchar_t* name) {
-        r1::itt_set_sync_name(obj, name);
-    }
-    inline void itt_set_sync_name(void* obj, const char* name) {
-        std::size_t len_name = multibyte_to_widechar(nullptr, name, 0);
-        wchar_t *obj_name = new wchar_t[len_name];
-        multibyte_to_widechar(obj_name, name, len_name);
-        r1::itt_set_sync_name(obj, obj_name);
-        delete[] obj_name;
-    }
-#else
-    inline void itt_set_sync_name( void* obj, const char* name) {
-        r1::itt_set_sync_name(obj, name);
-    }
-#endif //WIN
-
-    inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra,
-        void* parent, unsigned long long parent_extra, string_resource_index name_index) {
-        r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index);
-    }
-
-    inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra,
-                                        string_resource_index key, const char *value ) {
-        r1::itt_metadata_str_add( domain, addr, addr_extra, key, value );
-    }
-
-    inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra,
-        string_resource_index key, void *value) {
-        r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value);
-    }
-
-    inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
-                                    itt_relation relation, void *addr1, unsigned long long addr1_extra ) {
-        r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra );
-    }
-
-    inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra,
-                                                    void *parent, unsigned long long parent_extra, string_resource_index name_index ) {
-        r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index );
-    }
-
-    inline void itt_task_end( itt_domain_enum domain ) {
-        r1::itt_task_end( domain );
-    }
-
-    inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra,
-                                    void *parent, unsigned long long parent_extra, string_resource_index name_index ) {
-        r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index );
-    }
-
-    inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra  ) {
-        r1::itt_region_end( domain, region, region_extra );
-    }
-#else
-    inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {}
-
-    inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {}
-
-    inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {}
-#endif // TBB_USE_PROFILING_TOOLS
-
-#if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2)
-class event {
-/** This class supports user event traces through itt.
-    Common use-case is tagging data flow graph tasks (data-id)
-    and visualization by Intel Advisor Flow Graph Analyzer (FGA)  **/
-//  TODO: Replace implementation by itt user event api.
-
-    const std::string my_name;
-
-    static void emit_trace(const std::string &input) {
-        itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() );
-    }
-
-public:
-    event(const std::string &input)
-              : my_name( input )
-    { }
-
-    void emit() {
-        emit_trace(my_name);
-    }
-
-    static void emit(const std::string &description) {
-        emit_trace(description);
-    }
-
-};
-#else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2)
-// Using empty struct if user event tracing is disabled:
-struct event {
-    event(const std::string &) { }
-
-    void emit() { }
-
-    static void emit(const std::string &) { }
-};
-#endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2)
-} // namespace d1
-} // namespace detail
-
-namespace profiling {
-    using detail::d1::event;
-}
-} // namespace tbb
-
-
-#endif /* __TBB_profiling_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_profiling_H 
+#define __TBB_profiling_H 
+ 
+#include "detail/_config.h" 
+#include <cstdint> 
+ 
+#include <string> 
+ 
+namespace tbb { 
+namespace detail { 
+inline namespace d0 { 
+    // include list of index names 
+    #define TBB_STRING_RESOURCE(index_name,str) index_name, 
+    enum string_resource_index : std::uintptr_t { 
+        #include "detail/_string_resource.h" 
+        NUM_STRINGS 
+    }; 
+    #undef TBB_STRING_RESOURCE 
+ 
+    enum itt_relation 
+    { 
+    __itt_relation_is_unknown = 0, 
+    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */ 
+    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */ 
+    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */ 
+    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */ 
+    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ 
+    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ 
+    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ 
+    }; 
+ 
+//! Unicode support 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+    //! Unicode character type. Always wchar_t on Windows. 
+    using tchar = wchar_t; 
+#else /* !WIN */ 
+    using tchar = char; 
+#endif /* !WIN */ 
+ 
+} // namespace d0 
+} // namespace detail 
+} // namespace tbb 
+ 
+#include <atomic> 
+#if _WIN32||_WIN64 
+#include <stdlib.h>  /* mbstowcs_s */ 
+#endif 
+// Need these to work regardless of tools support 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+    enum notify_type {prepare=0, cancel, acquired, releasing, destroy}; 
+    enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS }; 
+} // namespace d1 
+ 
+namespace r1 { 
+    void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); 
+    void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); 
+    void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, 
+        void* parent, unsigned long long parent_extra, string_resource_index name_index); 
+    void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, 
+        void* parent, unsigned long long parent_extra, string_resource_index name_index); 
+    void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); 
+    void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); 
+    void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, 
+        string_resource_index key, const char* value); 
+    void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, 
+        string_resource_index key, void* value); 
+    void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, 
+        itt_relation relation, void* addr1, unsigned long long addr1_extra); 
+    void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, 
+        void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); 
+    void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); 
+} // namespace r1 
+ 
+namespace d1 { 
+#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__ 
+    inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { 
+        std::size_t len; 
+        mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); 
+        return len;   // mbstowcs_s counts null terminator 
+    } 
+#endif 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+    inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+        std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); 
+        wchar_t *type = new wchar_t[len_type]; 
+        multibyte_to_widechar(type, objtype, len_type); 
+        std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0); 
+        wchar_t *name = new wchar_t[len_name]; 
+        multibyte_to_widechar(name, objname, len_name); 
+#else // WIN 
+        const char *type = objtype; 
+        const char *name = objname; 
+#endif 
+        r1::create_itt_sync(ptr, type, name); 
+ 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+        delete[] type; 
+        delete[] name; 
+#endif // WIN 
+    } 
+ 
+// Distinguish notifications on task for reducing overheads 
+#if TBB_USE_PROFILING_TOOLS == 2 
+    inline void call_itt_task_notify(d1::notify_type t, void *ptr) { 
+        r1::call_itt_notify((int)t, ptr); 
+    } 
+#else 
+    inline void call_itt_task_notify(d1::notify_type, void *) {} 
+#endif // TBB_USE_PROFILING_TOOLS 
+ 
+    inline void call_itt_notify(d1::notify_type t, void *ptr) { 
+        r1::call_itt_notify((int)t, ptr); 
+    } 
+ 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+    inline void itt_set_sync_name(void* obj, const wchar_t* name) { 
+        r1::itt_set_sync_name(obj, name); 
+    } 
+    inline void itt_set_sync_name(void* obj, const char* name) { 
+        std::size_t len_name = multibyte_to_widechar(nullptr, name, 0); 
+        wchar_t *obj_name = new wchar_t[len_name]; 
+        multibyte_to_widechar(obj_name, name, len_name); 
+        r1::itt_set_sync_name(obj, obj_name); 
+        delete[] obj_name; 
+    } 
+#else 
+    inline void itt_set_sync_name( void* obj, const char* name) { 
+        r1::itt_set_sync_name(obj, name); 
+    } 
+#endif //WIN 
+ 
+    inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra, 
+        void* parent, unsigned long long parent_extra, string_resource_index name_index) { 
+        r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index); 
+    } 
+ 
+    inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra, 
+                                        string_resource_index key, const char *value ) { 
+        r1::itt_metadata_str_add( domain, addr, addr_extra, key, value ); 
+    } 
+ 
+    inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra, 
+        string_resource_index key, void *value) { 
+        r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value); 
+    } 
+ 
+    inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, 
+                                    itt_relation relation, void *addr1, unsigned long long addr1_extra ) { 
+        r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra ); 
+    } 
+ 
+    inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra, 
+                                                    void *parent, unsigned long long parent_extra, string_resource_index name_index ) { 
+        r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index ); 
+    } 
+ 
+    inline void itt_task_end( itt_domain_enum domain ) { 
+        r1::itt_task_end( domain ); 
+    } 
+ 
+    inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra, 
+                                    void *parent, unsigned long long parent_extra, string_resource_index name_index ) { 
+        r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index ); 
+    } 
+ 
+    inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra  ) { 
+        r1::itt_region_end( domain, region, region_extra ); 
+    } 
+#else 
+    inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {} 
+ 
+    inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} 
+ 
+    inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} 
+#endif // TBB_USE_PROFILING_TOOLS 
+ 
+#if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) 
+class event { 
+/** This class supports user event traces through itt. 
+    Common use-case is tagging data flow graph tasks (data-id) 
+    and visualization by Intel Advisor Flow Graph Analyzer (FGA)  **/ 
+//  TODO: Replace implementation by itt user event api. 
+ 
+    const std::string my_name; 
+ 
+    static void emit_trace(const std::string &input) { 
+        itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); 
+    } 
+ 
+public: 
+    event(const std::string &input) 
+              : my_name( input ) 
+    { } 
+ 
+    void emit() { 
+        emit_trace(my_name); 
+    } 
+ 
+    static void emit(const std::string &description) { 
+        emit_trace(description); 
+    } 
+ 
+}; 
+#else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) 
+// Using empty struct if user event tracing is disabled: 
+struct event { 
+    event(const std::string &) { } 
+ 
+    void emit() { } 
+ 
+    static void emit(const std::string &) { } 
+}; 
+#endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) 
+} // namespace d1 
+} // namespace detail 
+ 
+namespace profiling { 
+    using detail::d1::event; 
+} 
+} // namespace tbb 
+ 
+ 
+#endif /* __TBB_profiling_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h
index 6c3f1fe1e9..00c7443f9a 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h
@@ -1,197 +1,197 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_queuing_mutex_H
-#define __TBB_queuing_mutex_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_assert.h"
-#include "detail/_utils.h"
-
-#include "profiling.h"
-
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-//! Queuing mutex with local-only spinning.
-/** @ingroup synchronization */
-class queuing_mutex {
-public:
-    //! Construct unacquired mutex.
-    queuing_mutex() noexcept  {
-        create_itt_sync(this, "tbb::queuing_mutex", "");
-    };
-
-    queuing_mutex(const queuing_mutex&) = delete;
-    queuing_mutex& operator=(const queuing_mutex&) = delete;
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock {
-        //! Reset fields to mean "no lock held".
-        void reset() {
-            m_mutex = nullptr;
-        }
-
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() = default;
-
-        //! Acquire lock on given mutex.
-        scoped_lock(queuing_mutex& m) {
-            acquire(m);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if (m_mutex) release();
-        }
-
-        //! No Copy
-        scoped_lock( const scoped_lock& ) = delete;
-        scoped_lock& operator=( const scoped_lock& ) = delete;
-
-        //! Acquire lock on given mutex.
-        void acquire( queuing_mutex& m ) {
-            __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex");
-
-            // Must set all fields before the exchange, because once the
-            // exchange executes, *this becomes accessible to other threads.
-            m_mutex = &m;
-            m_next.store(nullptr, std::memory_order_relaxed);
-            m_going.store(0U, std::memory_order_relaxed);
-
-            // x86 compare exchange operation always has a strong fence
-            // "sending" the fields initialized above to other processors.
-            scoped_lock* pred = m.q_tail.exchange(this);
-            if (pred) {
-                call_itt_notify(prepare, &m);
-                __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!");
-
-                pred->m_next.store(this, std::memory_order_relaxed);
-                spin_wait_while_eq(m_going, 0U);
-            }
-            call_itt_notify(acquired, &m);
-
-            // Force acquire so that user's critical section receives correct values
-            // from processor that was previously in the user's critical section.
-            atomic_fence(std::memory_order_acquire);
-        }
-
-        //! Acquire lock on given mutex if free (i.e. non-blocking)
-        bool try_acquire( queuing_mutex& m ) {
-            __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex");
-
-            // Must set all fields before the compare_exchange_strong, because once the
-            // compare_exchange_strong executes, *this becomes accessible to other threads.
-            m_next.store(nullptr, std::memory_order_relaxed);
-            m_going.store(0U, std::memory_order_relaxed);
-
-            scoped_lock* expected = nullptr;
-            // The compare_exchange_strong must have release semantics, because we are
-            // "sending" the fields initialized above to other processors.
-            // x86 compare exchange operation always has a strong fence
-            if (!m.q_tail.compare_exchange_strong(expected, this))
-                return false;
-
-            m_mutex = &m;
-
-            // Force acquire so that user's critical section receives correct values
-            // from processor that was previously in the user's critical section.
-            atomic_fence(std::memory_order_acquire);
-            call_itt_notify(acquired, &m);
-            return true;
-        }
-
-        //! Release lock.
-        void release()
-        {
-            __TBB_ASSERT(this->m_mutex, "no lock acquired");
-
-            call_itt_notify(releasing, this->m_mutex);
-
-            if (m_next.load(std::memory_order_relaxed) == nullptr) {
-                scoped_lock* expected = this;
-                if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) {
-                    // this was the only item in the queue, and the queue is now empty.
-                    reset();
-                    return;
-                }
-                // Someone in the queue
-                spin_wait_while_eq(m_next, nullptr);
-            }
-            m_next.load(std::memory_order_relaxed)->m_going.store(1U, std::memory_order_release);
-
-            reset();
-        }
-
-    private:
-        //! The pointer to the mutex owned, or NULL if not holding a mutex.
-        queuing_mutex* m_mutex{nullptr};
-
-        //! The pointer to the next competitor for a mutex
-        std::atomic<scoped_lock*> m_next{nullptr};
-
-        //! The local spin-wait variable
-        /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of
-            zero-initialization.  Defining it as an entire word instead of
-            a byte seems to help performance slightly. */
-        std::atomic<uintptr_t> m_going{0U};
-    };
-
-    // Mutex traits
-    static constexpr bool is_rw_mutex = false;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = true;
-
-private:
-    //! The last competitor requesting the lock
-    std::atomic<scoped_lock*> q_tail{nullptr};
-
-};
-
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(queuing_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(queuing_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif //WIN
-#else
-inline void set_name(queuing_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(queuing_mutex&, const wchar_t*) {}
-#endif //WIN
-#endif
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::queuing_mutex;
-} // namespace v1
-namespace profiling {
-    using detail::d1::set_name;
-}
-} // namespace tbb
-
-#endif /* __TBB_queuing_mutex_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_queuing_mutex_H 
+#define __TBB_queuing_mutex_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_assert.h" 
+#include "detail/_utils.h" 
+ 
+#include "profiling.h" 
+ 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+//! Queuing mutex with local-only spinning. 
+/** @ingroup synchronization */ 
+class queuing_mutex { 
+public: 
+    //! Construct unacquired mutex. 
+    queuing_mutex() noexcept  { 
+        create_itt_sync(this, "tbb::queuing_mutex", ""); 
+    }; 
+ 
+    queuing_mutex(const queuing_mutex&) = delete; 
+    queuing_mutex& operator=(const queuing_mutex&) = delete; 
+ 
+    //! The scoped locking pattern 
+    /** It helps to avoid the common problem of forgetting to release lock. 
+        It also nicely provides the "node" for queuing locks. */ 
+    class scoped_lock { 
+        //! Reset fields to mean "no lock held". 
+        void reset() { 
+            m_mutex = nullptr; 
+        } 
+ 
+    public: 
+        //! Construct lock that has not acquired a mutex. 
+        /** Equivalent to zero-initialization of *this. */ 
+        scoped_lock() = default; 
+ 
+        //! Acquire lock on given mutex. 
+        scoped_lock(queuing_mutex& m) { 
+            acquire(m); 
+        } 
+ 
+        //! Release lock (if lock is held). 
+        ~scoped_lock() { 
+            if (m_mutex) release(); 
+        } 
+ 
+        //! No Copy 
+        scoped_lock( const scoped_lock& ) = delete; 
+        scoped_lock& operator=( const scoped_lock& ) = delete; 
+ 
+        //! Acquire lock on given mutex. 
+        void acquire( queuing_mutex& m ) { 
+            __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); 
+ 
+            // Must set all fields before the exchange, because once the 
+            // exchange executes, *this becomes accessible to other threads. 
+            m_mutex = &m; 
+            m_next.store(nullptr, std::memory_order_relaxed); 
+            m_going.store(0U, std::memory_order_relaxed); 
+ 
+            // x86 compare exchange operation always has a strong fence 
+            // "sending" the fields initialized above to other processors. 
+            scoped_lock* pred = m.q_tail.exchange(this); 
+            if (pred) { 
+                call_itt_notify(prepare, &m); 
+                __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!"); 
+ 
+                pred->m_next.store(this, std::memory_order_relaxed); 
+                spin_wait_while_eq(m_going, 0U); 
+            } 
+            call_itt_notify(acquired, &m); 
+ 
+            // Force acquire so that user's critical section receives correct values 
+            // from processor that was previously in the user's critical section. 
+            atomic_fence(std::memory_order_acquire); 
+        } 
+ 
+        //! Acquire lock on given mutex if free (i.e. non-blocking) 
+        bool try_acquire( queuing_mutex& m ) { 
+            __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); 
+ 
+            // Must set all fields before the compare_exchange_strong, because once the 
+            // compare_exchange_strong executes, *this becomes accessible to other threads. 
+            m_next.store(nullptr, std::memory_order_relaxed); 
+            m_going.store(0U, std::memory_order_relaxed); 
+ 
+            scoped_lock* expected = nullptr; 
+            // The compare_exchange_strong must have release semantics, because we are 
+            // "sending" the fields initialized above to other processors. 
+            // x86 compare exchange operation always has a strong fence 
+            if (!m.q_tail.compare_exchange_strong(expected, this)) 
+                return false; 
+ 
+            m_mutex = &m; 
+ 
+            // Force acquire so that user's critical section receives correct values 
+            // from processor that was previously in the user's critical section. 
+            atomic_fence(std::memory_order_acquire); 
+            call_itt_notify(acquired, &m); 
+            return true; 
+        } 
+ 
+        //! Release lock. 
+        void release() 
+        { 
+            __TBB_ASSERT(this->m_mutex, "no lock acquired"); 
+ 
+            call_itt_notify(releasing, this->m_mutex); 
+ 
+            if (m_next.load(std::memory_order_relaxed) == nullptr) { 
+                scoped_lock* expected = this; 
+                if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) { 
+                    // this was the only item in the queue, and the queue is now empty. 
+                    reset(); 
+                    return; 
+                } 
+                // Someone in the queue 
+                spin_wait_while_eq(m_next, nullptr); 
+            } 
+            m_next.load(std::memory_order_relaxed)->m_going.store(1U, std::memory_order_release); 
+ 
+            reset(); 
+        } 
+ 
+    private: 
+        //! The pointer to the mutex owned, or NULL if not holding a mutex. 
+        queuing_mutex* m_mutex{nullptr}; 
+ 
+        //! The pointer to the next competitor for a mutex 
+        std::atomic<scoped_lock*> m_next{nullptr}; 
+ 
+        //! The local spin-wait variable 
+        /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of 
+            zero-initialization.  Defining it as an entire word instead of 
+            a byte seems to help performance slightly. */ 
+        std::atomic<uintptr_t> m_going{0U}; 
+    }; 
+ 
+    // Mutex traits 
+    static constexpr bool is_rw_mutex = false; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = true; 
+ 
+private: 
+    //! The last competitor requesting the lock 
+    std::atomic<scoped_lock*> q_tail{nullptr}; 
+ 
+}; 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(queuing_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(queuing_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif //WIN 
+#else 
+inline void set_name(queuing_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(queuing_mutex&, const wchar_t*) {} 
+#endif //WIN 
+#endif 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::queuing_mutex; 
+} // namespace v1 
+namespace profiling { 
+    using detail::d1::set_name; 
+} 
+} // namespace tbb 
+ 
+#endif /* __TBB_queuing_mutex_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h
index 6bb748f8a3..502e7997df 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h
@@ -1,199 +1,199 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_queuing_rw_mutex_H
-#define __TBB_queuing_rw_mutex_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_assert.h"
-
-#include "profiling.h"
-
-#include <cstring>
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-namespace r1 {
-struct queuing_rw_mutex_impl;
-}
-namespace d1 {
-
-//! Queuing reader-writer mutex with local-only spinning.
-/** Adapted from Krieger, Stumm, et al. pseudocode at
-    https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock
-    @ingroup synchronization */
-class queuing_rw_mutex {
-    friend r1::queuing_rw_mutex_impl;
-public:
-    //! Construct unacquired mutex.
-    queuing_rw_mutex() noexcept  {
-        create_itt_sync(this, "tbb::queuing_rw_mutex", "");
-    }
-
-    //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL
-    ~queuing_rw_mutex() {
-        __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex");
-    }
-
-    //! No Copy
-    queuing_rw_mutex(const queuing_rw_mutex&) = delete;
-    queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete;
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock {
-        friend r1::queuing_rw_mutex_impl;
-        //! Initialize fields to mean "no lock held".
-        void initialize() {
-            my_mutex = nullptr;
-            my_internal_lock.store(0, std::memory_order_relaxed);
-            my_going.store(0, std::memory_order_relaxed);
-#if TBB_USE_ASSERT
-            my_state = 0xFF; // Set to invalid state
-            my_next.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed);
-            my_prev.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed);
-#endif /* TBB_USE_ASSERT */
-        }
-
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        scoped_lock() {initialize();}
-
-        //! Acquire lock on given mutex.
-        scoped_lock( queuing_rw_mutex& m, bool write=true ) {
-            initialize();
-            acquire(m,write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if( my_mutex ) release();
-        }
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        //! Acquire lock on given mutex.
-        void acquire( queuing_rw_mutex& m, bool write=true );
-
-        //! Acquire lock on given mutex if free (i.e. non-blocking)
-        bool try_acquire( queuing_rw_mutex& m, bool write=true );
-
-        //! Release lock.
-        void release();
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        bool upgrade_to_writer();
-
-        //! Downgrade writer to become a reader.
-        bool downgrade_to_reader();
-
-    private:
-        //! The pointer to the mutex owned, or NULL if not holding a mutex.
-        queuing_rw_mutex* my_mutex;
-
-        //! The 'pointer' to the previous and next competitors for a mutex
-        std::atomic<uintptr_t> my_prev;
-        std::atomic<uintptr_t> my_next;
-
-        using state_t = unsigned char ;
-
-        //! State of the request: reader, writer, active reader, other service states
-        std::atomic<state_t> my_state;
-
-        //! The local spin-wait variable
-        /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */
-        std::atomic<unsigned char> my_going;
-
-        //! A tiny internal lock
-        std::atomic<unsigned char> my_internal_lock;
-    };
-
-    // Mutex traits
-    static constexpr bool is_rw_mutex = true;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = true;
-
-private:
-    //! The last competitor requesting the lock
-    std::atomic<scoped_lock*> q_tail{nullptr};
-};
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(queuing_rw_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif //WIN
-#else
-inline void set_name(queuing_rw_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(queuing_rw_mutex&, const wchar_t*) {}
-#endif //WIN
-#endif
-} // namespace d1
-
-namespace r1 {
-void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool);
-bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool);
-void release(d1::queuing_rw_mutex::scoped_lock&);
-bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&);
-bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&);
-} // namespace r1
-
-namespace d1 {
-
-
-inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) {
-    r1::acquire(m, *this, write);
-}
-
-inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) {
-    return r1::try_acquire(m, *this, write);
-}
-
-inline void queuing_rw_mutex::scoped_lock::release() {
-    r1::release(*this);
-}
-
-inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() {
-    return r1::upgrade_to_writer(*this);
-}
-
-inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() {
-    return r1::downgrade_to_reader(*this);
-}
-} // namespace d1
-
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::queuing_rw_mutex;
-} // namespace v1
-namespace profiling {
-    using detail::d1::set_name;
-}
-} // namespace tbb
-
-#endif /* __TBB_queuing_rw_mutex_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_queuing_rw_mutex_H 
+#define __TBB_queuing_rw_mutex_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_assert.h" 
+ 
+#include "profiling.h" 
+ 
+#include <cstring> 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace r1 { 
+struct queuing_rw_mutex_impl; 
+} 
+namespace d1 { 
+ 
+//! Queuing reader-writer mutex with local-only spinning. 
+/** Adapted from Krieger, Stumm, et al. pseudocode at 
+    https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock 
+    @ingroup synchronization */ 
+class queuing_rw_mutex { 
+    friend r1::queuing_rw_mutex_impl; 
+public: 
+    //! Construct unacquired mutex. 
+    queuing_rw_mutex() noexcept  { 
+        create_itt_sync(this, "tbb::queuing_rw_mutex", ""); 
+    } 
+ 
+    //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL 
+    ~queuing_rw_mutex() { 
+        __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); 
+    } 
+ 
+    //! No Copy 
+    queuing_rw_mutex(const queuing_rw_mutex&) = delete; 
+    queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete; 
+ 
+    //! The scoped locking pattern 
+    /** It helps to avoid the common problem of forgetting to release lock. 
+        It also nicely provides the "node" for queuing locks. */ 
+    class scoped_lock { 
+        friend r1::queuing_rw_mutex_impl; 
+        //! Initialize fields to mean "no lock held". 
+        void initialize() { 
+            my_mutex = nullptr; 
+            my_internal_lock.store(0, std::memory_order_relaxed); 
+            my_going.store(0, std::memory_order_relaxed); 
+#if TBB_USE_ASSERT 
+            my_state = 0xFF; // Set to invalid state 
+            my_next.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); 
+            my_prev.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); 
+#endif /* TBB_USE_ASSERT */ 
+        } 
+ 
+    public: 
+        //! Construct lock that has not acquired a mutex. 
+        /** Equivalent to zero-initialization of *this. */ 
+        scoped_lock() {initialize();} 
+ 
+        //! Acquire lock on given mutex. 
+        scoped_lock( queuing_rw_mutex& m, bool write=true ) { 
+            initialize(); 
+            acquire(m,write); 
+        } 
+ 
+        //! Release lock (if lock is held). 
+        ~scoped_lock() { 
+            if( my_mutex ) release(); 
+        } 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        //! Acquire lock on given mutex. 
+        void acquire( queuing_rw_mutex& m, bool write=true ); 
+ 
+        //! Acquire lock on given mutex if free (i.e. non-blocking) 
+        bool try_acquire( queuing_rw_mutex& m, bool write=true ); 
+ 
+        //! Release lock. 
+        void release(); 
+ 
+        //! Upgrade reader to become a writer. 
+        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ 
+        bool upgrade_to_writer(); 
+ 
+        //! Downgrade writer to become a reader. 
+        bool downgrade_to_reader(); 
+ 
+    private: 
+        //! The pointer to the mutex owned, or NULL if not holding a mutex. 
+        queuing_rw_mutex* my_mutex; 
+ 
+        //! The 'pointer' to the previous and next competitors for a mutex 
+        std::atomic<uintptr_t> my_prev; 
+        std::atomic<uintptr_t> my_next; 
+ 
+        using state_t = unsigned char ; 
+ 
+        //! State of the request: reader, writer, active reader, other service states 
+        std::atomic<state_t> my_state; 
+ 
+        //! The local spin-wait variable 
+        /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */ 
+        std::atomic<unsigned char> my_going; 
+ 
+        //! A tiny internal lock 
+        std::atomic<unsigned char> my_internal_lock; 
+    }; 
+ 
+    // Mutex traits 
+    static constexpr bool is_rw_mutex = true; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = true; 
+ 
+private: 
+    //! The last competitor requesting the lock 
+    std::atomic<scoped_lock*> q_tail{nullptr}; 
+}; 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(queuing_rw_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif //WIN 
+#else 
+inline void set_name(queuing_rw_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(queuing_rw_mutex&, const wchar_t*) {} 
+#endif //WIN 
+#endif 
+} // namespace d1 
+ 
+namespace r1 { 
+void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); 
+bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); 
+void release(d1::queuing_rw_mutex::scoped_lock&); 
+bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); 
+bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); 
+} // namespace r1 
+ 
+namespace d1 { 
+ 
+ 
+inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) { 
+    r1::acquire(m, *this, write); 
+} 
+ 
+inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) { 
+    return r1::try_acquire(m, *this, write); 
+} 
+ 
+inline void queuing_rw_mutex::scoped_lock::release() { 
+    r1::release(*this); 
+} 
+ 
+inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { 
+    return r1::upgrade_to_writer(*this); 
+} 
+ 
+inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { 
+    return r1::downgrade_to_reader(*this); 
+} 
+} // namespace d1 
+ 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::queuing_rw_mutex; 
+} // namespace v1 
+namespace profiling { 
+    using detail::d1::set_name; 
+} 
+} // namespace tbb 
+ 
+#endif /* __TBB_queuing_rw_mutex_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h
index daab02f324..20addb3453 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h
@@ -1,332 +1,332 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_scalable_allocator_H
-#define __TBB_scalable_allocator_H
-
-#ifdef __cplusplus
-#include "oneapi/tbb/detail/_config.h"
-#include "oneapi/tbb/detail/_utils.h"
-#include <cstdlib>
-#include <utility>
-#else
-#include <stddef.h> /* Need ptrdiff_t and size_t from here. */
-#if !_MSC_VER
-#include <stdint.h> /* Need intptr_t from here. */
-#endif
-#endif
-
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-#error #include <memory_resource>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#if _MSC_VER
-    #define __TBB_EXPORTED_FUNC __cdecl
-#else
-    #define __TBB_EXPORTED_FUNC
-#endif
-
-/** The "malloc" analogue to allocate block of memory of size bytes.
-  * @ingroup memory_allocation */
-void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size);
-
-/** The "free" analogue to discard a previously allocated piece of memory.
-    @ingroup memory_allocation */
-void   __TBB_EXPORTED_FUNC scalable_free(void* ptr);
-
-/** The "realloc" analogue complementing scalable_malloc.
-    @ingroup memory_allocation */
-void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size);
-
-/** The "calloc" analogue complementing scalable_malloc.
-    @ingroup memory_allocation */
-void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size);
-
-/** The "posix_memalign" analogue.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size);
-
-/** The "_aligned_malloc" analogue.
-    @ingroup memory_allocation */
-void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment);
-
-/** The "_aligned_realloc" analogue.
-    @ingroup memory_allocation */
-void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment);
-
-/** The "_aligned_free" analogue.
-    @ingroup memory_allocation */
-void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr);
-
-/** The analogue of _msize/malloc_size/malloc_usable_size.
-    Returns the usable size of a memory block previously allocated by scalable_*,
-    or 0 (zero) if ptr does not point to such a block.
-    @ingroup memory_allocation */
-size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr);
-
-/* Results for scalable_allocation_* functions */
-typedef enum {
-    TBBMALLOC_OK,
-    TBBMALLOC_INVALID_PARAM,
-    TBBMALLOC_UNSUPPORTED,
-    TBBMALLOC_NO_MEMORY,
-    TBBMALLOC_NO_EFFECT
-} ScalableAllocationResult;
-
-/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages.
-   scalable_allocation_mode call has priority over environment variable. */
-typedef enum {
-    TBBMALLOC_USE_HUGE_PAGES,  /* value turns using huge pages on and off */
-    /* deprecated, kept for backward compatibility only */
-    USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES,
-    /* try to limit memory consumption value (Bytes), clean internal buffers
-       if limit is exceeded, but not prevents from requesting memory from OS */
-    TBBMALLOC_SET_SOFT_HEAP_LIMIT,
-    /* Lower bound for the size (Bytes), that is interpreted as huge
-     * and not released during regular cleanup operations. */
-    TBBMALLOC_SET_HUGE_SIZE_THRESHOLD
-} AllocationModeParam;
-
-/** Set TBB allocator-specific allocation modes.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value);
-
-typedef enum {
-    /* Clean internal allocator buffers for all threads.
-       Returns TBBMALLOC_NO_EFFECT if no buffers cleaned,
-       TBBMALLOC_OK if some memory released from buffers. */
-    TBBMALLOC_CLEAN_ALL_BUFFERS,
-    /* Clean internal allocator buffer for current thread only.
-       Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */
-    TBBMALLOC_CLEAN_THREAD_BUFFERS
-} ScalableAllocationCmd;
-
-/** Call TBB allocator-specific commands.
-    @ingroup memory_allocation */
-int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
-
-#ifdef __cplusplus
-
-//! The namespace rml contains components of low-level memory pool interface.
-namespace rml {
-class MemoryPool;
-
-typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes);
-// returns non-zero in case of error
-typedef int   (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes);
-
-struct MemPoolPolicy {
-    enum {
-        TBBMALLOC_POOL_VERSION = 1
-    };
-
-    rawAllocType pAlloc;
-    rawFreeType  pFree;
-                 // granularity of pAlloc allocations. 0 means default used.
-    std::size_t  granularity;
-    int          version;
-                 // all memory consumed at 1st pAlloc call and never returned,
-                 // no more pAlloc calls after 1st
-    unsigned     fixedPool : 1,
-                 // memory consumed but returned only at pool termination
-                 keepAllMemory : 1,
-                 reserved : 30;
-
-    MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_,
-                  std::size_t granularity_ = 0, bool fixedPool_ = false,
-                  bool keepAllMemory_ = false) :
-        pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION),
-        fixedPool(fixedPool_), keepAllMemory(keepAllMemory_),
-        reserved(0) {}
-};
-
-// enums have same values as appropriate enums from ScalableAllocationResult
-// TODO: use ScalableAllocationResult in pool_create directly
-enum MemPoolError {
-    // pool created successfully
-    POOL_OK = TBBMALLOC_OK,
-    // invalid policy parameters found
-    INVALID_POLICY = TBBMALLOC_INVALID_PARAM,
-     // requested pool policy is not supported by allocator library
-    UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED,
-    // lack of memory during pool creation
-    NO_MEMORY = TBBMALLOC_NO_MEMORY,
-    // action takes no effect
-    NO_EFFECT = TBBMALLOC_NO_EFFECT
-};
-
-MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy,
-                            rml::MemoryPool **pool);
-
-bool  pool_destroy(MemoryPool* memPool);
-void *pool_malloc(MemoryPool* memPool, std::size_t size);
-void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size);
-void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment);
-void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment);
-bool  pool_reset(MemoryPool* memPool);
-bool  pool_free(MemoryPool *memPool, void *object);
-MemoryPool *pool_identify(void *object);
-std::size_t pool_msize(MemoryPool *memPool, void *object);
-
-} // namespace rml
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-// keep throw in a separate function to prevent code bloat
-template<typename E>
-void throw_exception(const E &e) {
-#if TBB_USE_EXCEPTIONS
-    throw e;
-#else
-    suppress_unused_warning(e);
-#endif
-}
-
-template<typename T>
-class scalable_allocator {
-public:
-    using value_type = T;
-    using propagate_on_container_move_assignment = std::true_type;
-
-    //! Always defined for TBB containers
-    using is_always_equal = std::true_type;
-
-    scalable_allocator() = default;
-    template<typename U> scalable_allocator(const scalable_allocator<U>&) noexcept {}
-
-    //! Allocate space for n objects.
-    __TBB_nodiscard T* allocate(std::size_t n) {
-        T* p = static_cast<T*>(scalable_malloc(n * sizeof(value_type)));
-        if (!p) {
-            throw_exception(std::bad_alloc());
-        }
-        return p;
-    }
-
-    //! Free previously allocated block of memory
-    void deallocate(T* p, std::size_t) {
-        scalable_free(p);
-    }
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    using pointer = value_type*;
-    using const_pointer = const value_type*;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using difference_type = std::ptrdiff_t;
-    using size_type = std::size_t;
-    template<typename U> struct rebind {
-        using other = scalable_allocator<U>;
-    };
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const noexcept {
-        size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type);
-        return (absolutemax > 0 ? absolutemax : 1);
-    }
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new((void *)p) U(std::forward<Args>(args)...); }
-    void destroy(pointer p) { p->~value_type(); }
-    pointer address(reference x) const { return &x; }
-    const_pointer address(const_reference x) const { return &x; }
-#endif // TBB_ALLOCATOR_TRAITS_BROKEN
-
-};
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    template<>
-    class scalable_allocator<void> {
-    public:
-        using pointer = void*;
-        using const_pointer = const void*;
-        using value_type = void;
-        template<typename U> struct rebind {
-            using other = scalable_allocator<U>;
-        };
-    };
-#endif
-
-template<typename T, typename U>
-inline bool operator==(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return true; }
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template<typename T, typename U>
-inline bool operator!=(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return false; }
-#endif
-
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-
-//! C++17 memory resource implementation for scalable allocator
-//! ISO C++ Section 23.12.2
-class scalable_resource_impl : public std::pmr::memory_resource {
-private:
-    void* do_allocate(std::size_t bytes, std::size_t alignment) override {
-        void* p = scalable_aligned_malloc(bytes, alignment);
-        if (!p) {
-            throw_exception(std::bad_alloc());
-        }
-        return p;
-    }
-
-    void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override {
-        scalable_free(ptr);
-    }
-
-    //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any
-    //! other instance of this class
-    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
-        return this == &other ||
-#if __TBB_USE_OPTIONAL_RTTI
-            dynamic_cast<const scalable_resource_impl*>(&other) != nullptr;
-#else
-            false;
-#endif
-    }
-};
-
-//! Global scalable allocator memory resource provider
-inline std::pmr::memory_resource* scalable_memory_resource() noexcept {
-    static tbb::detail::d1::scalable_resource_impl scalable_res;
-    return &scalable_res;
-}
-
-#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::scalable_allocator;
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-using detail::d1::scalable_memory_resource;
-#endif
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __cplusplus */
-
-#endif /* __TBB_scalable_allocator_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_scalable_allocator_H 
+#define __TBB_scalable_allocator_H 
+ 
+#ifdef __cplusplus 
+#include "oneapi/tbb/detail/_config.h" 
+#include "oneapi/tbb/detail/_utils.h" 
+#include <cstdlib> 
+#include <utility> 
+#else 
+#include <stddef.h> /* Need ptrdiff_t and size_t from here. */ 
+#if !_MSC_VER 
+#include <stdint.h> /* Need intptr_t from here. */ 
+#endif 
+#endif 
+ 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+#error #include <memory_resource> 
+#endif 
+ 
+#ifdef __cplusplus 
+extern "C" { 
+#endif /* __cplusplus */ 
+ 
+#if _MSC_VER 
+    #define __TBB_EXPORTED_FUNC __cdecl 
+#else 
+    #define __TBB_EXPORTED_FUNC 
+#endif 
+ 
+/** The "malloc" analogue to allocate block of memory of size bytes. 
+  * @ingroup memory_allocation */ 
+void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size); 
+ 
+/** The "free" analogue to discard a previously allocated piece of memory. 
+    @ingroup memory_allocation */ 
+void   __TBB_EXPORTED_FUNC scalable_free(void* ptr); 
+ 
+/** The "realloc" analogue complementing scalable_malloc. 
+    @ingroup memory_allocation */ 
+void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size); 
+ 
+/** The "calloc" analogue complementing scalable_malloc. 
+    @ingroup memory_allocation */ 
+void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size); 
+ 
+/** The "posix_memalign" analogue. 
+    @ingroup memory_allocation */ 
+int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size); 
+ 
+/** The "_aligned_malloc" analogue. 
+    @ingroup memory_allocation */ 
+void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment); 
+ 
+/** The "_aligned_realloc" analogue. 
+    @ingroup memory_allocation */ 
+void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment); 
+ 
+/** The "_aligned_free" analogue. 
+    @ingroup memory_allocation */ 
+void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr); 
+ 
+/** The analogue of _msize/malloc_size/malloc_usable_size. 
+    Returns the usable size of a memory block previously allocated by scalable_*, 
+    or 0 (zero) if ptr does not point to such a block. 
+    @ingroup memory_allocation */ 
+size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr); 
+ 
+/* Results for scalable_allocation_* functions */ 
+typedef enum { 
+    TBBMALLOC_OK, 
+    TBBMALLOC_INVALID_PARAM, 
+    TBBMALLOC_UNSUPPORTED, 
+    TBBMALLOC_NO_MEMORY, 
+    TBBMALLOC_NO_EFFECT 
+} ScalableAllocationResult; 
+ 
+/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages. 
+   scalable_allocation_mode call has priority over environment variable. */ 
+typedef enum { 
+    TBBMALLOC_USE_HUGE_PAGES,  /* value turns using huge pages on and off */ 
+    /* deprecated, kept for backward compatibility only */ 
+    USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES, 
+    /* try to limit memory consumption value (Bytes), clean internal buffers 
+       if limit is exceeded, but not prevents from requesting memory from OS */ 
+    TBBMALLOC_SET_SOFT_HEAP_LIMIT, 
+    /* Lower bound for the size (Bytes), that is interpreted as huge 
+     * and not released during regular cleanup operations. */ 
+    TBBMALLOC_SET_HUGE_SIZE_THRESHOLD 
+} AllocationModeParam; 
+ 
+/** Set TBB allocator-specific allocation modes. 
+    @ingroup memory_allocation */ 
+int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value); 
+ 
+typedef enum { 
+    /* Clean internal allocator buffers for all threads. 
+       Returns TBBMALLOC_NO_EFFECT if no buffers cleaned, 
+       TBBMALLOC_OK if some memory released from buffers. */ 
+    TBBMALLOC_CLEAN_ALL_BUFFERS, 
+    /* Clean internal allocator buffer for current thread only. 
+       Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */ 
+    TBBMALLOC_CLEAN_THREAD_BUFFERS 
+} ScalableAllocationCmd; 
+ 
+/** Call TBB allocator-specific commands. 
+    @ingroup memory_allocation */ 
+int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param); 
+ 
+#ifdef __cplusplus 
+} /* extern "C" */ 
+#endif /* __cplusplus */ 
+ 
+#ifdef __cplusplus 
+ 
+//! The namespace rml contains components of low-level memory pool interface. 
+namespace rml { 
+class MemoryPool; 
+ 
+typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes); 
+// returns non-zero in case of error 
+typedef int   (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes); 
+ 
+struct MemPoolPolicy { 
+    enum { 
+        TBBMALLOC_POOL_VERSION = 1 
+    }; 
+ 
+    rawAllocType pAlloc; 
+    rawFreeType  pFree; 
+                 // granularity of pAlloc allocations. 0 means default used. 
+    std::size_t  granularity; 
+    int          version; 
+                 // all memory consumed at 1st pAlloc call and never returned, 
+                 // no more pAlloc calls after 1st 
+    unsigned     fixedPool : 1, 
+                 // memory consumed but returned only at pool termination 
+                 keepAllMemory : 1, 
+                 reserved : 30; 
+ 
+    MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_, 
+                  std::size_t granularity_ = 0, bool fixedPool_ = false, 
+                  bool keepAllMemory_ = false) : 
+        pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION), 
+        fixedPool(fixedPool_), keepAllMemory(keepAllMemory_), 
+        reserved(0) {} 
+}; 
+ 
+// enums have same values as appropriate enums from ScalableAllocationResult 
+// TODO: use ScalableAllocationResult in pool_create directly 
+enum MemPoolError { 
+    // pool created successfully 
+    POOL_OK = TBBMALLOC_OK, 
+    // invalid policy parameters found 
+    INVALID_POLICY = TBBMALLOC_INVALID_PARAM, 
+     // requested pool policy is not supported by allocator library 
+    UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED, 
+    // lack of memory during pool creation 
+    NO_MEMORY = TBBMALLOC_NO_MEMORY, 
+    // action takes no effect 
+    NO_EFFECT = TBBMALLOC_NO_EFFECT 
+}; 
+ 
+MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy, 
+                            rml::MemoryPool **pool); 
+ 
+bool  pool_destroy(MemoryPool* memPool); 
+void *pool_malloc(MemoryPool* memPool, std::size_t size); 
+void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size); 
+void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment); 
+void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment); 
+bool  pool_reset(MemoryPool* memPool); 
+bool  pool_free(MemoryPool *memPool, void *object); 
+MemoryPool *pool_identify(void *object); 
+std::size_t pool_msize(MemoryPool *memPool, void *object); 
+ 
+} // namespace rml 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+// keep throw in a separate function to prevent code bloat 
+template<typename E> 
+void throw_exception(const E &e) { 
+#if TBB_USE_EXCEPTIONS 
+    throw e; 
+#else 
+    suppress_unused_warning(e); 
+#endif 
+} 
+ 
+template<typename T> 
+class scalable_allocator { 
+public: 
+    using value_type = T; 
+    using propagate_on_container_move_assignment = std::true_type; 
+ 
+    //! Always defined for TBB containers 
+    using is_always_equal = std::true_type; 
+ 
+    scalable_allocator() = default; 
+    template<typename U> scalable_allocator(const scalable_allocator<U>&) noexcept {} 
+ 
+    //! Allocate space for n objects. 
+    __TBB_nodiscard T* allocate(std::size_t n) { 
+        T* p = static_cast<T*>(scalable_malloc(n * sizeof(value_type))); 
+        if (!p) { 
+            throw_exception(std::bad_alloc()); 
+        } 
+        return p; 
+    } 
+ 
+    //! Free previously allocated block of memory 
+    void deallocate(T* p, std::size_t) { 
+        scalable_free(p); 
+    } 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    using pointer = value_type*; 
+    using const_pointer = const value_type*; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using difference_type = std::ptrdiff_t; 
+    using size_type = std::size_t; 
+    template<typename U> struct rebind { 
+        using other = scalable_allocator<U>; 
+    }; 
+    //! Largest value for which method allocate might succeed. 
+    size_type max_size() const noexcept { 
+        size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type); 
+        return (absolutemax > 0 ? absolutemax : 1); 
+    } 
+    template<typename U, typename... Args> 
+    void construct(U *p, Args&&... args) 
+        { ::new((void *)p) U(std::forward<Args>(args)...); } 
+    void destroy(pointer p) { p->~value_type(); } 
+    pointer address(reference x) const { return &x; } 
+    const_pointer address(const_reference x) const { return &x; } 
+#endif // TBB_ALLOCATOR_TRAITS_BROKEN 
+ 
+}; 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    template<> 
+    class scalable_allocator<void> { 
+    public: 
+        using pointer = void*; 
+        using const_pointer = const void*; 
+        using value_type = void; 
+        template<typename U> struct rebind { 
+            using other = scalable_allocator<U>; 
+        }; 
+    }; 
+#endif 
+ 
+template<typename T, typename U> 
+inline bool operator==(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return true; } 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template<typename T, typename U> 
+inline bool operator!=(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return false; } 
+#endif 
+ 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+ 
+//! C++17 memory resource implementation for scalable allocator 
+//! ISO C++ Section 23.12.2 
+class scalable_resource_impl : public std::pmr::memory_resource { 
+private: 
+    void* do_allocate(std::size_t bytes, std::size_t alignment) override { 
+        void* p = scalable_aligned_malloc(bytes, alignment); 
+        if (!p) { 
+            throw_exception(std::bad_alloc()); 
+        } 
+        return p; 
+    } 
+ 
+    void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override { 
+        scalable_free(ptr); 
+    } 
+ 
+    //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any 
+    //! other instance of this class 
+    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { 
+        return this == &other || 
+#if __TBB_USE_OPTIONAL_RTTI 
+            dynamic_cast<const scalable_resource_impl*>(&other) != nullptr; 
+#else 
+            false; 
+#endif 
+    } 
+}; 
+ 
+//! Global scalable allocator memory resource provider 
+inline std::pmr::memory_resource* scalable_memory_resource() noexcept { 
+    static tbb::detail::d1::scalable_resource_impl scalable_res; 
+    return &scalable_res; 
+} 
+ 
+#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::scalable_allocator; 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+using detail::d1::scalable_memory_resource; 
+#endif 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __cplusplus */ 
+ 
+#endif /* __TBB_scalable_allocator_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h
index 7fde7e15af..aa9bcb6fd6 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h
@@ -1,179 +1,179 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_spin_mutex_H
-#define __TBB_spin_mutex_H
-
-#include "detail/_namespace_injection.h"
-
-#include "profiling.h"
-
-#include "detail/_assert.h"
-#include "detail/_utils.h"
-
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-#if __TBB_TSX_INTRINSICS_PRESENT
-class rtm_mutex;
-#endif
-
-/** A spin_mutex is a low-level synchronization primitive.
-    While locked, it causes the waiting threads to spin in a loop until the lock is released.
-    It should be used only for locking short critical sections
-    (typically less than 20 instructions) when fairness is not an issue.
-    If zero-initialized, the mutex is considered unheld.
-    @ingroup synchronization */
-class spin_mutex {
-public:
-    //! Constructors
-    spin_mutex() noexcept : m_flag(false) {
-        create_itt_sync(this, "tbb::spin_mutex", "");
-    };
-
-    //! Destructor
-    ~spin_mutex() = default;
-
-    //! No Copy
-    spin_mutex(const spin_mutex&) = delete;
-    spin_mutex& operator=(const spin_mutex&) = delete;
-
-    //! Represents acquisition of a mutex.
-    class scoped_lock {
-        //! Points to currently held mutex, or NULL if no lock is held.
-        spin_mutex* m_mutex;
-
-    public:
-        //! Construct without acquiring a mutex.
-        constexpr scoped_lock() noexcept : m_mutex(nullptr) {}
-
-        //! Construct and acquire lock on a mutex.
-        scoped_lock(spin_mutex& m) {
-            acquire(m);
-        }
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        //! Acquire lock.
-        void acquire(spin_mutex& m) {
-            m_mutex = &m;
-            m.lock();
-        }
-
-        //! Try acquiring lock (non-blocking)
-        /** Return true if lock acquired; false otherwise. */
-        bool try_acquire(spin_mutex& m) {
-            bool result = m.try_lock();
-            if (result) {
-                m_mutex = &m;
-            }
-            return result;
-        }
-
-        //! Release lock
-        void release() {
-            __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock");
-            m_mutex->unlock();
-            m_mutex = nullptr;
-        }
-
-        //! Destroy lock. If holding a lock, releases the lock first.
-        ~scoped_lock() {
-            if (m_mutex) {
-                release();
-            }
-        }
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = false;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = false;
-
-    //! Acquire lock
-    /** Spin if the lock is taken */
-    void lock() {
-        atomic_backoff backoff;
-        call_itt_notify(prepare, this);
-        while (m_flag.exchange(true)) backoff.pause();
-        call_itt_notify(acquired, this);
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-        bool result = !m_flag.exchange(true);
-        if (result) {
-            call_itt_notify(acquired, this);
-        }
-        return result;
-    }
-
-    //! Release lock
-    void unlock() {
-        call_itt_notify(releasing, this);
-        m_flag.store(false, std::memory_order_release);
-    }
-
-protected:
-    std::atomic<bool> m_flag;
-}; // class spin_mutex
-
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(spin_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(spin_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif //WIN
-#else
-inline void set_name(spin_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(spin_mutex&, const wchar_t*) {}
-#endif // WIN
-#endif
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::spin_mutex;
-} // namespace v1
-namespace profiling {
-    using detail::d1::set_name;
-}
-} // namespace tbb
-
-#include "detail/_rtm_mutex.h"
-
-namespace tbb {
-inline namespace v1 {
-#if __TBB_TSX_INTRINSICS_PRESENT
-    using speculative_spin_mutex = detail::d1::rtm_mutex;
-#else
-    using speculative_spin_mutex = detail::d1::spin_mutex;
-#endif
-}
-}
-
-#endif /* __TBB_spin_mutex_H */
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_spin_mutex_H 
+#define __TBB_spin_mutex_H 
+ 
+#include "detail/_namespace_injection.h" 
+ 
+#include "profiling.h" 
+ 
+#include "detail/_assert.h" 
+#include "detail/_utils.h" 
+ 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+#if __TBB_TSX_INTRINSICS_PRESENT 
+class rtm_mutex; 
+#endif 
+ 
+/** A spin_mutex is a low-level synchronization primitive. 
+    While locked, it causes the waiting threads to spin in a loop until the lock is released. 
+    It should be used only for locking short critical sections 
+    (typically less than 20 instructions) when fairness is not an issue. 
+    If zero-initialized, the mutex is considered unheld. 
+    @ingroup synchronization */ 
+class spin_mutex { 
+public: 
+    //! Constructors 
+    spin_mutex() noexcept : m_flag(false) { 
+        create_itt_sync(this, "tbb::spin_mutex", ""); 
+    }; 
+ 
+    //! Destructor 
+    ~spin_mutex() = default; 
+ 
+    //! No Copy 
+    spin_mutex(const spin_mutex&) = delete; 
+    spin_mutex& operator=(const spin_mutex&) = delete; 
+ 
+    //! Represents acquisition of a mutex. 
+    class scoped_lock { 
+        //! Points to currently held mutex, or NULL if no lock is held. 
+        spin_mutex* m_mutex; 
+ 
+    public: 
+        //! Construct without acquiring a mutex. 
+        constexpr scoped_lock() noexcept : m_mutex(nullptr) {} 
+ 
+        //! Construct and acquire lock on a mutex. 
+        scoped_lock(spin_mutex& m) { 
+            acquire(m); 
+        } 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        //! Acquire lock. 
+        void acquire(spin_mutex& m) { 
+            m_mutex = &m; 
+            m.lock(); 
+        } 
+ 
+        //! Try acquiring lock (non-blocking) 
+        /** Return true if lock acquired; false otherwise. */ 
+        bool try_acquire(spin_mutex& m) { 
+            bool result = m.try_lock(); 
+            if (result) { 
+                m_mutex = &m; 
+            } 
+            return result; 
+        } 
+ 
+        //! Release lock 
+        void release() { 
+            __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock"); 
+            m_mutex->unlock(); 
+            m_mutex = nullptr; 
+        } 
+ 
+        //! Destroy lock. If holding a lock, releases the lock first. 
+        ~scoped_lock() { 
+            if (m_mutex) { 
+                release(); 
+            } 
+        } 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = false; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = false; 
+ 
+    //! Acquire lock 
+    /** Spin if the lock is taken */ 
+    void lock() { 
+        atomic_backoff backoff; 
+        call_itt_notify(prepare, this); 
+        while (m_flag.exchange(true)) backoff.pause(); 
+        call_itt_notify(acquired, this); 
+    } 
+ 
+    //! Try acquiring lock (non-blocking) 
+    /** Return true if lock acquired; false otherwise. */ 
+    bool try_lock() { 
+        bool result = !m_flag.exchange(true); 
+        if (result) { 
+            call_itt_notify(acquired, this); 
+        } 
+        return result; 
+    } 
+ 
+    //! Release lock 
+    void unlock() { 
+        call_itt_notify(releasing, this); 
+        m_flag.store(false, std::memory_order_release); 
+    } 
+ 
+protected: 
+    std::atomic<bool> m_flag; 
+}; // class spin_mutex 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(spin_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(spin_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif //WIN 
+#else 
+inline void set_name(spin_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(spin_mutex&, const wchar_t*) {} 
+#endif // WIN 
+#endif 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::spin_mutex; 
+} // namespace v1 
+namespace profiling { 
+    using detail::d1::set_name; 
+} 
+} // namespace tbb 
+ 
+#include "detail/_rtm_mutex.h" 
+ 
+namespace tbb { 
+inline namespace v1 { 
+#if __TBB_TSX_INTRINSICS_PRESENT 
+    using speculative_spin_mutex = detail::d1::rtm_mutex; 
+#else 
+    using speculative_spin_mutex = detail::d1::spin_mutex; 
+#endif 
+} 
+} 
+ 
+#endif /* __TBB_spin_mutex_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h
index baf6b24b56..d3a43be30a 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h
@@ -1,307 +1,307 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_spin_rw_mutex_H
-#define __TBB_spin_rw_mutex_H
-
-#include "detail/_namespace_injection.h"
-
-#include "profiling.h"
-
-#include "detail/_assert.h"
-#include "detail/_utils.h"
-
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-#if __TBB_TSX_INTRINSICS_PRESENT
-class rtm_rw_mutex;
-#endif
-
-//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference
-/** @ingroup synchronization */
-class spin_rw_mutex {
-public:
-    //! Constructors
-    spin_rw_mutex() noexcept : m_state(0) {
-       create_itt_sync(this, "tbb::spin_rw_mutex", "");
-    }
-
-    //! Destructor
-    ~spin_rw_mutex() {
-        __TBB_ASSERT(!m_state, "destruction of an acquired mutex");
-    }
-
-    //! No Copy
-    spin_rw_mutex(const spin_rw_mutex&) = delete;
-    spin_rw_mutex& operator=(const spin_rw_mutex&) = delete;
-
-    //! The scoped locking pattern
-    /** It helps to avoid the common problem of forgetting to release lock.
-        It also nicely provides the "node" for queuing locks. */
-    class scoped_lock {
-    public:
-        //! Construct lock that has not acquired a mutex.
-        /** Equivalent to zero-initialization of *this. */
-        constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {}
-
-        //! Acquire lock on given mutex.
-        scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) {
-            acquire(m, write);
-        }
-
-        //! Release lock (if lock is held).
-        ~scoped_lock() {
-            if (m_mutex) {
-                release();
-            }
-        }
-
-        //! No Copy
-        scoped_lock(const scoped_lock&) = delete;
-        scoped_lock& operator=(const scoped_lock&) = delete;
-
-        //! Acquire lock on given mutex.
-        void acquire(spin_rw_mutex& m, bool write = true) {
-            m_is_writer = write;
-            m_mutex = &m;
-            if (write) {
-                m_mutex->lock();
-            } else {
-                m_mutex->lock_shared();
-            }
-        }
-
-        //! Try acquire lock on given mutex.
-        bool try_acquire(spin_rw_mutex& m, bool write = true) {
-            m_is_writer = write;
-            bool result = write ? m.try_lock() : m.try_lock_shared();
-            if (result) {
-                m_mutex = &m;
-            }
-            return result;
-        }
-
-        //! Release lock.
-        void release() {
-            spin_rw_mutex* m = m_mutex;
-            m_mutex = nullptr;
-
-            if (m_is_writer) {
-                m->unlock();
-            } else {
-                m->unlock_shared();
-            }
-        }
-
-        //! Upgrade reader to become a writer.
-        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-        bool upgrade_to_writer() {
-            if (m_is_writer) return true; // Already a writer
-            m_is_writer = true;
-            return m_mutex->upgrade();
-        }
-
-        //! Downgrade writer to become a reader.
-        bool downgrade_to_reader() {
-            if (!m_is_writer) return true; // Already a reader
-            m_mutex->downgrade();
-            m_is_writer = false;
-            return true;
-        }
-
-    protected:
-        //! The pointer to the current mutex that is held, or nullptr if no mutex is held.
-        spin_rw_mutex* m_mutex;
-
-        //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock.
-        /** Not defined if not holding a lock. */
-        bool m_is_writer;
-    };
-
-    //! Mutex traits
-    static constexpr bool is_rw_mutex = true;
-    static constexpr bool is_recursive_mutex = false;
-    static constexpr bool is_fair_mutex = false;
-
-    //! Acquire lock
-    void lock() {
-        call_itt_notify(prepare, this);
-        for (atomic_backoff backoff; ; backoff.pause()) {
-            state_type s = m_state.load(std::memory_order_relaxed);
-            if (!(s & BUSY)) { // no readers, no writers
-                if (m_state.compare_exchange_strong(s, WRITER))
-                    break; // successfully stored writer flag
-                backoff.reset(); // we could be very close to complete op.
-            } else if (!(s & WRITER_PENDING)) { // no pending writers
-                m_state |= WRITER_PENDING;
-            }
-        }
-        call_itt_notify(acquired, this);
-    }
-
-    //! Try acquiring lock (non-blocking)
-    /** Return true if lock acquired; false otherwise. */
-    bool try_lock() {
-        // for a writer: only possible to acquire if no active readers or writers
-        state_type s = m_state.load(std::memory_order_relaxed);
-        if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101
-            if (m_state.compare_exchange_strong(s, WRITER)) {
-                call_itt_notify(acquired, this);
-                return true; // successfully stored writer flag
-            }
-        }
-        return false;
-    }
-
-    //! Release lock
-    void unlock() {
-        call_itt_notify(releasing, this);
-        m_state &= READERS;
-    }
-
-    //! Lock shared ownership mutex
-    void lock_shared() {
-        call_itt_notify(prepare, this);
-        for (atomic_backoff b; ; b.pause()) {
-            state_type s = m_state.load(std::memory_order_relaxed);
-            if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests
-                state_type prev_state = m_state.fetch_add(ONE_READER);
-                if (!(prev_state & WRITER)) {
-                    break; // successfully stored increased number of readers
-                }
-                // writer got there first, undo the increment
-                m_state -= ONE_READER;
-            }
-        }
-        call_itt_notify(acquired, this);
-        __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers");
-    }
-
-    //! Try lock shared ownership mutex
-    bool try_lock_shared() {
-        // for a reader: acquire if no active or waiting writers
-        state_type s = m_state.load(std::memory_order_relaxed);
-        if (!(s & (WRITER | WRITER_PENDING))) { // no writers
-            state_type prev_state = m_state.fetch_add(ONE_READER);
-            if (!(prev_state & WRITER)) {  // got the lock
-                call_itt_notify(acquired, this);
-                return true; // successfully stored increased number of readers
-            }
-            // writer got there first, undo the increment
-            m_state -= ONE_READER;
-        }
-        return false;
-    }
-
-    //! Unlock shared ownership mutex
-    void unlock_shared() {
-        __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers");
-        call_itt_notify(releasing, this);
-        m_state -= ONE_READER;
-    }
-
-protected:
-    /** Internal non ISO C++ standard API **/
-    //! This API is used through the scoped_lock class
-
-    //! Upgrade reader to become a writer.
-    /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
-    bool upgrade() {
-        state_type s = m_state.load(std::memory_order_relaxed);
-        __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers ");
-        // Check and set writer-pending flag.
-        // Required conditions: either no pending writers, or we are the only reader
-        // (with multiple readers and pending writer, another upgrade could have been requested)
-        while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) {
-            if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) {
-                atomic_backoff backoff;
-                while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause();
-                __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer");
-                // Both new readers and writers are blocked at this time
-                m_state -= (ONE_READER + WRITER_PENDING);
-                return true; // successfully upgraded
-            }
-        }
-        // Slow reacquire
-        unlock_shared();
-        lock();
-        return false;
-    }
-
-    //! Downgrade writer to a reader
-    void downgrade() {
-        call_itt_notify(releasing, this);
-        m_state += (ONE_READER - WRITER);
-        __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers");
-    }
-
-    using state_type = std::intptr_t;
-    static constexpr state_type WRITER = 1;
-    static constexpr state_type WRITER_PENDING = 2;
-    static constexpr state_type READERS = ~(WRITER | WRITER_PENDING);
-    static constexpr state_type ONE_READER = 4;
-    static constexpr state_type BUSY = WRITER | READERS;
-    //! State of lock
-    /** Bit 0 = writer is holding lock
-        Bit 1 = request by a writer to acquire lock (hint to readers to wait)
-        Bit 2..N = number of readers holding lock */
-    std::atomic<state_type> m_state;
-}; // class spin_rw_mutex
-
-#if TBB_USE_PROFILING_TOOLS
-inline void set_name(spin_rw_mutex& obj, const char* name) {
-    itt_set_sync_name(&obj, name);
-}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(spin_rw_mutex& obj, const wchar_t* name) {
-    itt_set_sync_name(&obj, name);
-}
-#endif // WIN
-#else
-inline void set_name(spin_rw_mutex&, const char*) {}
-#if (_WIN32||_WIN64) && !__MINGW32__
-inline void set_name(spin_rw_mutex&, const wchar_t*) {}
-#endif // WIN
-#endif
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::spin_rw_mutex;
-} // namespace v1
-namespace profiling {
-    using detail::d1::set_name;
-}
-} // namespace tbb
-
-#include "detail/_rtm_rw_mutex.h"
-
-namespace tbb {
-inline namespace v1 {
-#if __TBB_TSX_INTRINSICS_PRESENT
-    using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex;
-#else
-    using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex;
-#endif
-}
-}
-
-#endif /* __TBB_spin_rw_mutex_H */
-
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_spin_rw_mutex_H 
+#define __TBB_spin_rw_mutex_H 
+ 
+#include "detail/_namespace_injection.h" 
+ 
+#include "profiling.h" 
+ 
+#include "detail/_assert.h" 
+#include "detail/_utils.h" 
+ 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+#if __TBB_TSX_INTRINSICS_PRESENT 
+class rtm_rw_mutex; 
+#endif 
+ 
+//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference 
+/** @ingroup synchronization */ 
+class spin_rw_mutex { 
+public: 
+    //! Constructors 
+    spin_rw_mutex() noexcept : m_state(0) { 
+       create_itt_sync(this, "tbb::spin_rw_mutex", ""); 
+    } 
+ 
+    //! Destructor 
+    ~spin_rw_mutex() { 
+        __TBB_ASSERT(!m_state, "destruction of an acquired mutex"); 
+    } 
+ 
+    //! No Copy 
+    spin_rw_mutex(const spin_rw_mutex&) = delete; 
+    spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; 
+ 
+    //! The scoped locking pattern 
+    /** It helps to avoid the common problem of forgetting to release lock. 
+        It also nicely provides the "node" for queuing locks. */ 
+    class scoped_lock { 
+    public: 
+        //! Construct lock that has not acquired a mutex. 
+        /** Equivalent to zero-initialization of *this. */ 
+        constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {} 
+ 
+        //! Acquire lock on given mutex. 
+        scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) { 
+            acquire(m, write); 
+        } 
+ 
+        //! Release lock (if lock is held). 
+        ~scoped_lock() { 
+            if (m_mutex) { 
+                release(); 
+            } 
+        } 
+ 
+        //! No Copy 
+        scoped_lock(const scoped_lock&) = delete; 
+        scoped_lock& operator=(const scoped_lock&) = delete; 
+ 
+        //! Acquire lock on given mutex. 
+        void acquire(spin_rw_mutex& m, bool write = true) { 
+            m_is_writer = write; 
+            m_mutex = &m; 
+            if (write) { 
+                m_mutex->lock(); 
+            } else { 
+                m_mutex->lock_shared(); 
+            } 
+        } 
+ 
+        //! Try acquire lock on given mutex. 
+        bool try_acquire(spin_rw_mutex& m, bool write = true) { 
+            m_is_writer = write; 
+            bool result = write ? m.try_lock() : m.try_lock_shared(); 
+            if (result) { 
+                m_mutex = &m; 
+            } 
+            return result; 
+        } 
+ 
+        //! Release lock. 
+        void release() { 
+            spin_rw_mutex* m = m_mutex; 
+            m_mutex = nullptr; 
+ 
+            if (m_is_writer) { 
+                m->unlock(); 
+            } else { 
+                m->unlock_shared(); 
+            } 
+        } 
+ 
+        //! Upgrade reader to become a writer. 
+        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ 
+        bool upgrade_to_writer() { 
+            if (m_is_writer) return true; // Already a writer 
+            m_is_writer = true; 
+            return m_mutex->upgrade(); 
+        } 
+ 
+        //! Downgrade writer to become a reader. 
+        bool downgrade_to_reader() { 
+            if (!m_is_writer) return true; // Already a reader 
+            m_mutex->downgrade(); 
+            m_is_writer = false; 
+            return true; 
+        } 
+ 
+    protected: 
+        //! The pointer to the current mutex that is held, or nullptr if no mutex is held. 
+        spin_rw_mutex* m_mutex; 
+ 
+        //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. 
+        /** Not defined if not holding a lock. */ 
+        bool m_is_writer; 
+    }; 
+ 
+    //! Mutex traits 
+    static constexpr bool is_rw_mutex = true; 
+    static constexpr bool is_recursive_mutex = false; 
+    static constexpr bool is_fair_mutex = false; 
+ 
+    //! Acquire lock 
+    void lock() { 
+        call_itt_notify(prepare, this); 
+        for (atomic_backoff backoff; ; backoff.pause()) { 
+            state_type s = m_state.load(std::memory_order_relaxed); 
+            if (!(s & BUSY)) { // no readers, no writers 
+                if (m_state.compare_exchange_strong(s, WRITER)) 
+                    break; // successfully stored writer flag 
+                backoff.reset(); // we could be very close to complete op. 
+            } else if (!(s & WRITER_PENDING)) { // no pending writers 
+                m_state |= WRITER_PENDING; 
+            } 
+        } 
+        call_itt_notify(acquired, this); 
+    } 
+ 
+    //! Try acquiring lock (non-blocking) 
+    /** Return true if lock acquired; false otherwise. */ 
+    bool try_lock() { 
+        // for a writer: only possible to acquire if no active readers or writers 
+        state_type s = m_state.load(std::memory_order_relaxed); 
+        if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 
+            if (m_state.compare_exchange_strong(s, WRITER)) { 
+                call_itt_notify(acquired, this); 
+                return true; // successfully stored writer flag 
+            } 
+        } 
+        return false; 
+    } 
+ 
+    //! Release lock 
+    void unlock() { 
+        call_itt_notify(releasing, this); 
+        m_state &= READERS; 
+    } 
+ 
+    //! Lock shared ownership mutex 
+    void lock_shared() { 
+        call_itt_notify(prepare, this); 
+        for (atomic_backoff b; ; b.pause()) { 
+            state_type s = m_state.load(std::memory_order_relaxed); 
+            if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests 
+                state_type prev_state = m_state.fetch_add(ONE_READER); 
+                if (!(prev_state & WRITER)) { 
+                    break; // successfully stored increased number of readers 
+                } 
+                // writer got there first, undo the increment 
+                m_state -= ONE_READER; 
+            } 
+        } 
+        call_itt_notify(acquired, this); 
+        __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); 
+    } 
+ 
+    //! Try lock shared ownership mutex 
+    bool try_lock_shared() { 
+        // for a reader: acquire if no active or waiting writers 
+        state_type s = m_state.load(std::memory_order_relaxed); 
+        if (!(s & (WRITER | WRITER_PENDING))) { // no writers 
+            state_type prev_state = m_state.fetch_add(ONE_READER); 
+            if (!(prev_state & WRITER)) {  // got the lock 
+                call_itt_notify(acquired, this); 
+                return true; // successfully stored increased number of readers 
+            } 
+            // writer got there first, undo the increment 
+            m_state -= ONE_READER; 
+        } 
+        return false; 
+    } 
+ 
+    //! Unlock shared ownership mutex 
+    void unlock_shared() { 
+        __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); 
+        call_itt_notify(releasing, this); 
+        m_state -= ONE_READER; 
+    } 
+ 
+protected: 
+    /** Internal non ISO C++ standard API **/ 
+    //! This API is used through the scoped_lock class 
+ 
+    //! Upgrade reader to become a writer. 
+    /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ 
+    bool upgrade() { 
+        state_type s = m_state.load(std::memory_order_relaxed); 
+        __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); 
+        // Check and set writer-pending flag. 
+        // Required conditions: either no pending writers, or we are the only reader 
+        // (with multiple readers and pending writer, another upgrade could have been requested) 
+        while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { 
+            if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { 
+                atomic_backoff backoff; 
+                while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause(); 
+                __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); 
+                // Both new readers and writers are blocked at this time 
+                m_state -= (ONE_READER + WRITER_PENDING); 
+                return true; // successfully upgraded 
+            } 
+        } 
+        // Slow reacquire 
+        unlock_shared(); 
+        lock(); 
+        return false; 
+    } 
+ 
+    //! Downgrade writer to a reader 
+    void downgrade() { 
+        call_itt_notify(releasing, this); 
+        m_state += (ONE_READER - WRITER); 
+        __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers"); 
+    } 
+ 
+    using state_type = std::intptr_t; 
+    static constexpr state_type WRITER = 1; 
+    static constexpr state_type WRITER_PENDING = 2; 
+    static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); 
+    static constexpr state_type ONE_READER = 4; 
+    static constexpr state_type BUSY = WRITER | READERS; 
+    //! State of lock 
+    /** Bit 0 = writer is holding lock 
+        Bit 1 = request by a writer to acquire lock (hint to readers to wait) 
+        Bit 2..N = number of readers holding lock */ 
+    std::atomic<state_type> m_state; 
+}; // class spin_rw_mutex 
+ 
+#if TBB_USE_PROFILING_TOOLS 
+inline void set_name(spin_rw_mutex& obj, const char* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { 
+    itt_set_sync_name(&obj, name); 
+} 
+#endif // WIN 
+#else 
+inline void set_name(spin_rw_mutex&, const char*) {} 
+#if (_WIN32||_WIN64) && !__MINGW32__ 
+inline void set_name(spin_rw_mutex&, const wchar_t*) {} 
+#endif // WIN 
+#endif 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::spin_rw_mutex; 
+} // namespace v1 
+namespace profiling { 
+    using detail::d1::set_name; 
+} 
+} // namespace tbb 
+ 
+#include "detail/_rtm_rw_mutex.h" 
+ 
+namespace tbb { 
+inline namespace v1 { 
+#if __TBB_TSX_INTRINSICS_PRESENT 
+    using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex; 
+#else 
+    using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex; 
+#endif 
+} 
+} 
+ 
+#endif /* __TBB_spin_rw_mutex_H */ 
+ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/task.h b/contrib/libs/tbb/include/oneapi/tbb/task.h
index 82ce1df6cd..1d242e4cc3 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/task.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/task.h
@@ -1,37 +1,37 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_task_H
-#define __TBB_task_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_task.h"
-
-namespace tbb {
-inline namespace v1 {
-namespace task {
-#if __TBB_RESUMABLE_TASKS
-    using detail::d1::suspend_point;
-    using detail::d1::resume;
-    using detail::d1::suspend;
-#endif /* __TBB_RESUMABLE_TASKS */
-    using detail::d1::current_context;
-} // namespace task
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_task_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_task_H 
+#define __TBB_task_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_task.h" 
+ 
+namespace tbb { 
+inline namespace v1 { 
+namespace task { 
+#if __TBB_RESUMABLE_TASKS 
+    using detail::d1::suspend_point; 
+    using detail::d1::resume; 
+    using detail::d1::suspend; 
+#endif /* __TBB_RESUMABLE_TASKS */ 
+    using detail::d1::current_context; 
+} // namespace task 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_task_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h
index f1d0f9dea3..b83c5d7866 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h
@@ -1,452 +1,452 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_task_arena_H
-#define __TBB_task_arena_H
-
-#include "detail/_namespace_injection.h"
-#include "detail/_task.h"
-#include "detail/_exception.h"
-#include "detail/_aligned_space.h"
-#include "detail/_small_object_pool.h"
-
-#if __TBB_ARENA_BINDING
-#include "info.h"
-#endif /*__TBB_ARENA_BINDING*/
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-
-template<typename F, typename R>
-class task_arena_function : public delegate_base {
-    F &my_func;
-    aligned_space<R> my_return_storage;
-    bool my_constructed{false};
-    // The function should be called only once.
-    bool operator()() const override {
-        new (my_return_storage.begin()) R(my_func());
-        return true;
-    }
-public:
-    task_arena_function(F& f) : my_func(f) {}
-    // The function can be called only after operator() and only once.
-    R consume_result() {
-        my_constructed = true;
-        return std::move(*(my_return_storage.begin()));
-    }
-    ~task_arena_function() override {
-        if (my_constructed) {
-            my_return_storage.begin()->~R();
-        }
-    }
-};
-
-template<typename F>
-class task_arena_function<F,void> : public delegate_base {
-    F &my_func;
-    bool operator()() const override {
-        my_func();
-        return true;
-    }
-public:
-    task_arena_function(F& f) : my_func(f) {}
-    void consume_result() const {}
-
-    friend class task_arena_base;
-};
-
-class task_arena_base;
-class task_scheduler_observer;
-} // namespace d1
-
-namespace r1 {
-class arena;
-struct task_arena_impl;
-
-void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool);
-void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&);
-void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&);
-bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&);
-void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&);
-void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&);
-int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*);
-void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t);
-
-void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*);
-void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t);
-} // namespace r1
-
-namespace d1 {
-
-static constexpr int priority_stride = INT_MAX / 4;
-
-class task_arena_base {
-    friend struct r1::task_arena_impl;
-    friend void r1::observe(d1::task_scheduler_observer&, bool);
-public:
-    enum class priority : int {
-        low    = 1 * priority_stride,
-        normal = 2 * priority_stride,
-        high   = 3 * priority_stride
-    };
-#if __TBB_ARENA_BINDING
-    using constraints = tbb::detail::d1::constraints;
-#endif /*__TBB_ARENA_BINDING*/
-protected:
-    //! Special settings
-    intptr_t my_version_and_traits;
-
-    std::atomic<do_once_state> my_initialization_state;
-
-    //! NULL if not currently initialized.
-    std::atomic<r1::arena*> my_arena;
-    static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), 
-        "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer");
-
-    //! Concurrency level for deferred initialization
-    int my_max_concurrency;
-
-    //! Reserved slots for external threads
-    unsigned my_num_reserved_slots;
-
-    //! Arena priority
-    priority my_priority;
-
-    //! The NUMA node index to which the arena will be attached
-    numa_node_id my_numa_id;
-
-    //! The core type index to which arena will be attached
-    core_type_id my_core_type;
-
-    //! Number of threads per core
-    int my_max_threads_per_core;
-
-    // Backward compatibility checks.
-    core_type_id core_type() const {
-        return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic;
-    }
-    int max_threads_per_core() const {
-        return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic;
-    }
-
-    enum {
-        default_flags = 0
-        , core_type_support_flag = 1
-    };
-
-    task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority)
-        : my_version_and_traits(default_flags | core_type_support_flag)
-        , my_initialization_state(do_once_state::uninitialized)
-        , my_arena(nullptr)
-        , my_max_concurrency(max_concurrency)
-        , my_num_reserved_slots(reserved_for_masters)
-        , my_priority(a_priority)
-        , my_numa_id(automatic)
-        , my_core_type(automatic)
-        , my_max_threads_per_core(automatic)
-        {}
-
-#if __TBB_ARENA_BINDING
-    task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority)
-        : my_version_and_traits(default_flags | core_type_support_flag)
-        , my_initialization_state(do_once_state::uninitialized)
-        , my_arena(nullptr)
-        , my_max_concurrency(constraints_.max_concurrency)
-        , my_num_reserved_slots(reserved_for_masters)
-        , my_priority(a_priority)
-        , my_numa_id(constraints_.numa_id)
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-        , my_core_type(constraints_.core_type)
-        , my_max_threads_per_core(constraints_.max_threads_per_core)
-#else
-        , my_core_type(automatic)
-        , my_max_threads_per_core(automatic)
-#endif
-        {}
-#endif /*__TBB_ARENA_BINDING*/
-public:
-    //! Typedef for number of threads that is automatic.
-    static const int automatic = -1;
-    static const int not_initialized = -2;
-};
-
-template<typename R, typename F>
-R isolate_impl(F& f) {
-    task_arena_function<F, R> func(f);
-    r1::isolate_within_arena(func, /*isolation*/ 0);
-    return func.consume_result();
-}
-
-/** 1-to-1 proxy representation class of scheduler's arena
- * Constructors set up settings only, real construction is deferred till the first method invocation
- * Destructor only removes one of the references to the inner arena representation.
- * Final destruction happens when all the references (and the work) are gone.
- */
-class task_arena : public task_arena_base {
-
-    template <typename F>
-    class enqueue_task : public task {
-        small_object_allocator m_allocator;
-        const F m_func;
-
-        void finalize(const execution_data& ed) {
-            m_allocator.delete_object(this, ed);
-        }
-        task* execute(execution_data& ed) override {
-            m_func();
-            finalize(ed);
-            return nullptr;
-        }
-        task* cancel(execution_data&) override {
-            __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught");
-            return nullptr;
-        }
-    public:
-        enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {}
-        enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {}
-    };
-
-    void mark_initialized() {
-        __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" );
-        my_initialization_state.store(do_once_state::initialized, std::memory_order_release);
-    }
-
-    template<typename F>
-    void enqueue_impl(F&& f) {
-        initialize();
-        small_object_allocator alloc{};
-        r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this);
-    }
-
-    template<typename R, typename F>
-    R execute_impl(F& f) {
-        initialize();
-        task_arena_function<F, R> func(f);
-        r1::execute(*this, func);
-        return func.consume_result();
-    }
-public:
-    //! Creates task_arena with certain concurrency limits
-    /** Sets up settings only, real construction is deferred till the first method invocation
-     *  @arg max_concurrency specifies total number of slots in arena where threads work
-     *  @arg reserved_for_masters specifies number of slots to be used by external threads only.
-     *       Value of 1 is default and reflects behavior of implicit arenas.
-     **/
-    task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1,
-               priority a_priority = priority::normal)
-        : task_arena_base(max_concurrency_, reserved_for_masters, a_priority)
-    {}
-
-#if __TBB_ARENA_BINDING
-    //! Creates task arena pinned to certain NUMA node
-    task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1,
-               priority a_priority = priority::normal)
-        : task_arena_base(constraints_, reserved_for_masters, a_priority)
-    {}
-
-    //! Copies settings from another task_arena
-    task_arena(const task_arena &s) // copy settings but not the reference or instance
-        : task_arena_base(
-            constraints{}
-                .set_numa_id(s.my_numa_id)
-                .set_max_concurrency(s.my_max_concurrency)
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-                .set_core_type(s.my_core_type)
-                .set_max_threads_per_core(s.my_max_threads_per_core)
-#endif
-            , s.my_num_reserved_slots, s.my_priority)
-    {}
-#else
-    //! Copies settings from another task_arena
-    task_arena(const task_arena& a) // copy settings but not the reference or instance
-        : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority)
-    {}
-#endif /*__TBB_ARENA_BINDING*/
-
-    //! Tag class used to indicate the "attaching" constructor
-    struct attach {};
-
-    //! Creates an instance of task_arena attached to the current arena of the thread
-    explicit task_arena( attach )
-        : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails
-    {
-        if (r1::attach(*this)) {
-            mark_initialized();
-        }
-    }
-
-    //! Forces allocation of the resources for the task_arena as specified in constructor arguments
-    void initialize() {
-        atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state);
-    }
-
-    //! Overrides concurrency level and forces initialization of internal representation
-    void initialize(int max_concurrency_, unsigned reserved_for_masters = 1,
-                    priority a_priority = priority::normal)
-    {
-        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena");
-        if( !is_active() ) {
-            my_max_concurrency = max_concurrency_;
-            my_num_reserved_slots = reserved_for_masters;
-            my_priority = a_priority;
-            r1::initialize(*this);
-            mark_initialized();
-        }
-    }
-
-#if __TBB_ARENA_BINDING
-    void initialize(constraints constraints_, unsigned reserved_for_masters = 1,
-                    priority a_priority = priority::normal)
-    {
-        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena");
-        if( !is_active() ) {
-            my_numa_id = constraints_.numa_id;
-            my_max_concurrency = constraints_.max_concurrency;
-#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
-            my_core_type = constraints_.core_type;
-            my_max_threads_per_core = constraints_.max_threads_per_core;
-#endif
-            my_num_reserved_slots = reserved_for_masters;
-            my_priority = a_priority;
-            r1::initialize(*this);
-            mark_initialized();
-        }
-    }
-#endif /*__TBB_ARENA_BINDING*/
-
-    //! Attaches this instance to the current arena of the thread
-    void initialize(attach) {
-        // TODO: decide if this call must be thread-safe
-        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena");
-        if( !is_active() ) {
-            if ( !r1::attach(*this) ) {
-                r1::initialize(*this);
-            }
-            mark_initialized();
-        }
-    }
-
-    //! Removes the reference to the internal arena representation.
-    //! Not thread safe wrt concurrent invocations of other methods.
-    void terminate() {
-        if( is_active() ) {
-            r1::terminate(*this);
-            my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed);
-        }
-    }
-
-    //! Removes the reference to the internal arena representation, and destroys the external object.
-    //! Not thread safe wrt concurrent invocations of other methods.
-    ~task_arena() {
-        terminate();
-    }
-
-    //! Returns true if the arena is active (initialized); false otherwise.
-    //! The name was chosen to match a task_scheduler_init method with the same semantics.
-    bool is_active() const {
-        return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized;
-    }
-
-    //! Enqueues a task into the arena to process a functor, and immediately returns.
-    //! Does not require the calling thread to join the arena
-
-    template<typename F>
-    void enqueue(F&& f) {
-        enqueue_impl(std::forward<F>(f));
-    }
-
-    //! Joins the arena and executes a mutable functor, then returns
-    //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion
-    //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread
-    //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void).
-    template<typename F>
-    auto execute(F&& f) -> decltype(f()) {
-        return execute_impl<decltype(f())>(f);
-    }
-
-#if __TBB_EXTRA_DEBUG
-    //! Returns my_num_reserved_slots
-    int debug_reserved_slots() const {
-        // Handle special cases inside the library
-        return my_num_reserved_slots;
-    }
-
-    //! Returns my_max_concurrency
-    int debug_max_concurrency() const {
-        // Handle special cases inside the library
-        return my_max_concurrency;
-    }
-
-    //! Wait for all work in the arena to be completed
-    //! Even submitted by other application threads
-    //! Joins arena if/when possible (in the same way as execute())
-    void debug_wait_until_empty() {
-        initialize();
-        r1::wait(*this);
-    }
-#endif //__TBB_EXTRA_DEBUG
-
-    //! Returns the maximal number of threads that can work inside the arena
-    int max_concurrency() const {
-        // Handle special cases inside the library
-        return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this);
-    }
-
-    friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) {
-        __TBB_ASSERT(ta.is_active(), nullptr);
-        call_itt_task_notify(releasing, &t);
-        r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0);
-    }
-};
-
-//! Executes a mutable functor in isolation within the current task arena.
-//! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void).
-template<typename F>
-inline auto isolate(F&& f) -> decltype(f()) {
-    return isolate_impl<decltype(f())>(f);
-}
-
-//! Returns the index, aka slot number, of the calling thread in its current arena
-inline int current_thread_index() {
-    int idx = r1::execution_slot(nullptr);
-    return idx == -1 ? task_arena_base::not_initialized : idx;
-}
-
-//! Returns the maximal number of threads that can work inside the arena
-inline int max_concurrency() {
-    return r1::max_concurrency(nullptr);
-}
-
-using r1::submit;
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::task_arena;
-
-namespace this_task_arena {
-using detail::d1::current_thread_index;
-using detail::d1::max_concurrency;
-using detail::d1::isolate;
-} // namespace this_task_arena
-
-} // inline namespace v1
-
-} // namespace tbb
-#endif /* __TBB_task_arena_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_task_arena_H 
+#define __TBB_task_arena_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "detail/_task.h" 
+#include "detail/_exception.h" 
+#include "detail/_aligned_space.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#if __TBB_ARENA_BINDING 
+#include "info.h" 
+#endif /*__TBB_ARENA_BINDING*/ 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+ 
+template<typename F, typename R> 
+class task_arena_function : public delegate_base { 
+    F &my_func; 
+    aligned_space<R> my_return_storage; 
+    bool my_constructed{false}; 
+    // The function should be called only once. 
+    bool operator()() const override { 
+        new (my_return_storage.begin()) R(my_func()); 
+        return true; 
+    } 
+public: 
+    task_arena_function(F& f) : my_func(f) {} 
+    // The function can be called only after operator() and only once. 
+    R consume_result() { 
+        my_constructed = true; 
+        return std::move(*(my_return_storage.begin())); 
+    } 
+    ~task_arena_function() override { 
+        if (my_constructed) { 
+            my_return_storage.begin()->~R(); 
+        } 
+    } 
+}; 
+ 
+template<typename F> 
+class task_arena_function<F,void> : public delegate_base { 
+    F &my_func; 
+    bool operator()() const override { 
+        my_func(); 
+        return true; 
+    } 
+public: 
+    task_arena_function(F& f) : my_func(f) {} 
+    void consume_result() const {} 
+ 
+    friend class task_arena_base; 
+}; 
+ 
+class task_arena_base; 
+class task_scheduler_observer; 
+} // namespace d1 
+ 
+namespace r1 { 
+class arena; 
+struct task_arena_impl; 
+ 
+void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); 
+void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); 
+void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); 
+bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); 
+void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); 
+void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); 
+int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); 
+void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); 
+ 
+void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); 
+void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); 
+} // namespace r1 
+ 
+namespace d1 { 
+ 
+static constexpr int priority_stride = INT_MAX / 4; 
+ 
+class task_arena_base { 
+    friend struct r1::task_arena_impl; 
+    friend void r1::observe(d1::task_scheduler_observer&, bool); 
+public: 
+    enum class priority : int { 
+        low    = 1 * priority_stride, 
+        normal = 2 * priority_stride, 
+        high   = 3 * priority_stride 
+    }; 
+#if __TBB_ARENA_BINDING 
+    using constraints = tbb::detail::d1::constraints; 
+#endif /*__TBB_ARENA_BINDING*/ 
+protected: 
+    //! Special settings 
+    intptr_t my_version_and_traits; 
+ 
+    std::atomic<do_once_state> my_initialization_state; 
+ 
+    //! NULL if not currently initialized. 
+    std::atomic<r1::arena*> my_arena; 
+    static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*),  
+        "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); 
+ 
+    //! Concurrency level for deferred initialization 
+    int my_max_concurrency; 
+ 
+    //! Reserved slots for external threads 
+    unsigned my_num_reserved_slots; 
+ 
+    //! Arena priority 
+    priority my_priority; 
+ 
+    //! The NUMA node index to which the arena will be attached 
+    numa_node_id my_numa_id; 
+ 
+    //! The core type index to which arena will be attached 
+    core_type_id my_core_type; 
+ 
+    //! Number of threads per core 
+    int my_max_threads_per_core; 
+ 
+    // Backward compatibility checks. 
+    core_type_id core_type() const { 
+        return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic; 
+    } 
+    int max_threads_per_core() const { 
+        return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic; 
+    } 
+ 
+    enum { 
+        default_flags = 0 
+        , core_type_support_flag = 1 
+    }; 
+ 
+    task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) 
+        : my_version_and_traits(default_flags | core_type_support_flag) 
+        , my_initialization_state(do_once_state::uninitialized) 
+        , my_arena(nullptr) 
+        , my_max_concurrency(max_concurrency) 
+        , my_num_reserved_slots(reserved_for_masters) 
+        , my_priority(a_priority) 
+        , my_numa_id(automatic) 
+        , my_core_type(automatic) 
+        , my_max_threads_per_core(automatic) 
+        {} 
+ 
+#if __TBB_ARENA_BINDING 
+    task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) 
+        : my_version_and_traits(default_flags | core_type_support_flag) 
+        , my_initialization_state(do_once_state::uninitialized) 
+        , my_arena(nullptr) 
+        , my_max_concurrency(constraints_.max_concurrency) 
+        , my_num_reserved_slots(reserved_for_masters) 
+        , my_priority(a_priority) 
+        , my_numa_id(constraints_.numa_id) 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+        , my_core_type(constraints_.core_type) 
+        , my_max_threads_per_core(constraints_.max_threads_per_core) 
+#else 
+        , my_core_type(automatic) 
+        , my_max_threads_per_core(automatic) 
+#endif 
+        {} 
+#endif /*__TBB_ARENA_BINDING*/ 
+public: 
+    //! Typedef for number of threads that is automatic. 
+    static const int automatic = -1; 
+    static const int not_initialized = -2; 
+}; 
+ 
+template<typename R, typename F> 
+R isolate_impl(F& f) { 
+    task_arena_function<F, R> func(f); 
+    r1::isolate_within_arena(func, /*isolation*/ 0); 
+    return func.consume_result(); 
+} 
+ 
+/** 1-to-1 proxy representation class of scheduler's arena 
+ * Constructors set up settings only, real construction is deferred till the first method invocation 
+ * Destructor only removes one of the references to the inner arena representation. 
+ * Final destruction happens when all the references (and the work) are gone. 
+ */ 
+class task_arena : public task_arena_base { 
+ 
+    template <typename F> 
+    class enqueue_task : public task { 
+        small_object_allocator m_allocator; 
+        const F m_func; 
+ 
+        void finalize(const execution_data& ed) { 
+            m_allocator.delete_object(this, ed); 
+        } 
+        task* execute(execution_data& ed) override { 
+            m_func(); 
+            finalize(ed); 
+            return nullptr; 
+        } 
+        task* cancel(execution_data&) override { 
+            __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); 
+            return nullptr; 
+        } 
+    public: 
+        enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} 
+        enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} 
+    }; 
+ 
+    void mark_initialized() { 
+        __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); 
+        my_initialization_state.store(do_once_state::initialized, std::memory_order_release); 
+    } 
+ 
+    template<typename F> 
+    void enqueue_impl(F&& f) { 
+        initialize(); 
+        small_object_allocator alloc{}; 
+        r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this); 
+    } 
+ 
+    template<typename R, typename F> 
+    R execute_impl(F& f) { 
+        initialize(); 
+        task_arena_function<F, R> func(f); 
+        r1::execute(*this, func); 
+        return func.consume_result(); 
+    } 
+public: 
+    //! Creates task_arena with certain concurrency limits 
+    /** Sets up settings only, real construction is deferred till the first method invocation 
+     *  @arg max_concurrency specifies total number of slots in arena where threads work 
+     *  @arg reserved_for_masters specifies number of slots to be used by external threads only. 
+     *       Value of 1 is default and reflects behavior of implicit arenas. 
+     **/ 
+    task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1, 
+               priority a_priority = priority::normal) 
+        : task_arena_base(max_concurrency_, reserved_for_masters, a_priority) 
+    {} 
+ 
+#if __TBB_ARENA_BINDING 
+    //! Creates task arena pinned to certain NUMA node 
+    task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1, 
+               priority a_priority = priority::normal) 
+        : task_arena_base(constraints_, reserved_for_masters, a_priority) 
+    {} 
+ 
+    //! Copies settings from another task_arena 
+    task_arena(const task_arena &s) // copy settings but not the reference or instance 
+        : task_arena_base( 
+            constraints{} 
+                .set_numa_id(s.my_numa_id) 
+                .set_max_concurrency(s.my_max_concurrency) 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+                .set_core_type(s.my_core_type) 
+                .set_max_threads_per_core(s.my_max_threads_per_core) 
+#endif 
+            , s.my_num_reserved_slots, s.my_priority) 
+    {} 
+#else 
+    //! Copies settings from another task_arena 
+    task_arena(const task_arena& a) // copy settings but not the reference or instance 
+        : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority) 
+    {} 
+#endif /*__TBB_ARENA_BINDING*/ 
+ 
+    //! Tag class used to indicate the "attaching" constructor 
+    struct attach {}; 
+ 
+    //! Creates an instance of task_arena attached to the current arena of the thread 
+    explicit task_arena( attach ) 
+        : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails 
+    { 
+        if (r1::attach(*this)) { 
+            mark_initialized(); 
+        } 
+    } 
+ 
+    //! Forces allocation of the resources for the task_arena as specified in constructor arguments 
+    void initialize() { 
+        atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); 
+    } 
+ 
+    //! Overrides concurrency level and forces initialization of internal representation 
+    void initialize(int max_concurrency_, unsigned reserved_for_masters = 1, 
+                    priority a_priority = priority::normal) 
+    { 
+        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); 
+        if( !is_active() ) { 
+            my_max_concurrency = max_concurrency_; 
+            my_num_reserved_slots = reserved_for_masters; 
+            my_priority = a_priority; 
+            r1::initialize(*this); 
+            mark_initialized(); 
+        } 
+    } 
+ 
+#if __TBB_ARENA_BINDING 
+    void initialize(constraints constraints_, unsigned reserved_for_masters = 1, 
+                    priority a_priority = priority::normal) 
+    { 
+        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); 
+        if( !is_active() ) { 
+            my_numa_id = constraints_.numa_id; 
+            my_max_concurrency = constraints_.max_concurrency; 
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 
+            my_core_type = constraints_.core_type; 
+            my_max_threads_per_core = constraints_.max_threads_per_core; 
+#endif 
+            my_num_reserved_slots = reserved_for_masters; 
+            my_priority = a_priority; 
+            r1::initialize(*this); 
+            mark_initialized(); 
+        } 
+    } 
+#endif /*__TBB_ARENA_BINDING*/ 
+ 
+    //! Attaches this instance to the current arena of the thread 
+    void initialize(attach) { 
+        // TODO: decide if this call must be thread-safe 
+        __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); 
+        if( !is_active() ) { 
+            if ( !r1::attach(*this) ) { 
+                r1::initialize(*this); 
+            } 
+            mark_initialized(); 
+        } 
+    } 
+ 
+    //! Removes the reference to the internal arena representation. 
+    //! Not thread safe wrt concurrent invocations of other methods. 
+    void terminate() { 
+        if( is_active() ) { 
+            r1::terminate(*this); 
+            my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed); 
+        } 
+    } 
+ 
+    //! Removes the reference to the internal arena representation, and destroys the external object. 
+    //! Not thread safe wrt concurrent invocations of other methods. 
+    ~task_arena() { 
+        terminate(); 
+    } 
+ 
+    //! Returns true if the arena is active (initialized); false otherwise. 
+    //! The name was chosen to match a task_scheduler_init method with the same semantics. 
+    bool is_active() const { 
+        return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized; 
+    } 
+ 
+    //! Enqueues a task into the arena to process a functor, and immediately returns. 
+    //! Does not require the calling thread to join the arena 
+ 
+    template<typename F> 
+    void enqueue(F&& f) { 
+        enqueue_impl(std::forward<F>(f)); 
+    } 
+ 
+    //! Joins the arena and executes a mutable functor, then returns 
+    //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion 
+    //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread 
+    //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). 
+    template<typename F> 
+    auto execute(F&& f) -> decltype(f()) { 
+        return execute_impl<decltype(f())>(f); 
+    } 
+ 
+#if __TBB_EXTRA_DEBUG 
+    //! Returns my_num_reserved_slots 
+    int debug_reserved_slots() const { 
+        // Handle special cases inside the library 
+        return my_num_reserved_slots; 
+    } 
+ 
+    //! Returns my_max_concurrency 
+    int debug_max_concurrency() const { 
+        // Handle special cases inside the library 
+        return my_max_concurrency; 
+    } 
+ 
+    //! Wait for all work in the arena to be completed 
+    //! Even submitted by other application threads 
+    //! Joins arena if/when possible (in the same way as execute()) 
+    void debug_wait_until_empty() { 
+        initialize(); 
+        r1::wait(*this); 
+    } 
+#endif //__TBB_EXTRA_DEBUG 
+ 
+    //! Returns the maximal number of threads that can work inside the arena 
+    int max_concurrency() const { 
+        // Handle special cases inside the library 
+        return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this); 
+    } 
+ 
+    friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) { 
+        __TBB_ASSERT(ta.is_active(), nullptr); 
+        call_itt_task_notify(releasing, &t); 
+        r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0); 
+    } 
+}; 
+ 
+//! Executes a mutable functor in isolation within the current task arena. 
+//! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). 
+template<typename F> 
+inline auto isolate(F&& f) -> decltype(f()) { 
+    return isolate_impl<decltype(f())>(f); 
+} 
+ 
+//! Returns the index, aka slot number, of the calling thread in its current arena 
+inline int current_thread_index() { 
+    int idx = r1::execution_slot(nullptr); 
+    return idx == -1 ? task_arena_base::not_initialized : idx; 
+} 
+ 
+//! Returns the maximal number of threads that can work inside the arena 
+inline int max_concurrency() { 
+    return r1::max_concurrency(nullptr); 
+} 
+ 
+using r1::submit; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::task_arena; 
+ 
+namespace this_task_arena { 
+using detail::d1::current_thread_index; 
+using detail::d1::max_concurrency; 
+using detail::d1::isolate; 
+} // namespace this_task_arena 
+ 
+} // inline namespace v1 
+ 
+} // namespace tbb 
+#endif /* __TBB_task_arena_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_group.h b/contrib/libs/tbb/include/oneapi/tbb/task_group.h
index e82553076a..0aa7c46a8c 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/task_group.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/task_group.h
@@ -1,556 +1,556 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_task_group_H
-#define __TBB_task_group_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-#include "detail/_template_helpers.h"
-#include "detail/_utils.h"
-#include "detail/_exception.h"
-#include "detail/_task.h"
-#include "detail/_small_object_pool.h"
-
-#include "profiling.h"
-
-#include <functional>
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    // Suppress warning: structure was padded due to alignment specifier
-    #pragma warning(push)
-    #pragma warning(disable:4324)
-#endif
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class delegate_base;
-class task_arena_base;
-class task_group_context;
-}
-
-namespace r1 {
-// Forward declarations
-class tbb_exception_ptr;
-class market;
-class thread_data;
-class task_dispatcher;
-template <bool>
-class context_guard_helper;
-struct task_arena_impl;
-
-void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&);
-void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t);
-
-void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&);
-void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&);
-void __TBB_EXPORTED_FUNC reset(d1::task_group_context&);
-bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&);
-bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&);
-void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&);
-
-struct task_group_context_impl;
-}
-
-namespace d1 {
-
-struct context_list_node {
-    std::atomic<context_list_node*> prev{};
-    std::atomic<context_list_node*> next{};
-
-    void remove_relaxed() {
-        context_list_node* p = prev.load(std::memory_order_relaxed);
-        context_list_node* n = next.load(std::memory_order_relaxed);
-        p->next.store(n, std::memory_order_relaxed);
-        n->prev.store(p, std::memory_order_relaxed);
-    }
-};
-
-//! Used to form groups of tasks
-/** @ingroup task_scheduling
-    The context services explicit cancellation requests from user code, and unhandled
-    exceptions intercepted during tasks execution. Intercepting an exception results
-    in generating internal cancellation requests (which is processed in exactly the
-    same way as external ones).
-
-    The context is associated with one or more root tasks and defines the cancellation
-    group that includes all the descendants of the corresponding root task(s). Association
-    is established when a context object is passed as an argument to the task::allocate_root()
-    method. See task_group_context::task_group_context for more details.
-
-    The context can be bound to another one, and other contexts can be bound to it,
-    forming a tree-like structure: parent -> this -> children. Arrows here designate
-    cancellation propagation direction. If a task in a cancellation group is cancelled
-    all the other tasks in this group and groups bound to it (as children) get cancelled too.
-**/
-class task_group_context : no_copy {
-public:
-    enum traits_type {
-        fp_settings     = 1 << 1,
-        concurrent_wait = 1 << 2,
-        default_traits  = 0
-    };
-    enum kind_type {
-        isolated,
-        bound
-    };
-private:
-    //! Space for platform-specific FPU settings.
-    /** Must only be accessed inside TBB binaries, and never directly in user
-    code or inline methods. */
-    std::uint64_t my_cpu_ctl_env;
-
-    //! Specifies whether cancellation was requested for this task group.
-    std::atomic<std::uint32_t> my_cancellation_requested;
-
-    //! Version for run-time checks and behavioral traits of the context.
-    std::uint8_t my_version;
-
-    //! The context traits.
-    struct context_traits {
-        bool fp_settings        : 1;
-        bool concurrent_wait    : 1;
-        bool bound              : 1;
-    } my_traits;
-
-    static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte.");
-
-    static constexpr std::uint8_t may_have_children = 1;
-    //! The context internal state (currently only may_have_children).
-    std::atomic<std::uint8_t> my_state;
-
-    enum class lifetime_state : std::uint8_t {
-        created,
-        locked,
-        isolated,
-        bound,
-        detached,
-        dying
-    };
-
-    //! The synchronization machine state to manage lifetime.
-    std::atomic<lifetime_state> my_lifetime_state;
-
-    //! Pointer to the context of the parent cancellation group. NULL for isolated contexts.
-    task_group_context* my_parent;
-
-    //! Thread data instance that registered this context in its list.
-    std::atomic<r1::thread_data*> my_owner;
-
-    //! Used to form the thread specific list of contexts without additional memory allocation.
-    /** A context is included into the list of the current thread when its binding to
-        its parent happens. Any context can be present in the list of one thread only. **/
-    context_list_node my_node;
-
-    //! Pointer to the container storing exception being propagated across this task group.
-    r1::tbb_exception_ptr* my_exception;
-
-    //! Used to set and maintain stack stitching point for Intel Performance Tools.
-    void* my_itt_caller;
-
-    //! Description of algorithm for scheduler based instrumentation.
-    string_resource_index my_name;
-
-    char padding[max_nfs_size
-        - sizeof(std::uint64_t) // my_cpu_ctl_env
-        - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested
-        - sizeof(std::uint8_t) // my_version
-        - sizeof(context_traits) // my_traits
-        - sizeof(std::atomic<std::uint8_t>) // my_state
-        - sizeof(std::atomic<lifetime_state>) // my_lifetime_state
-        - sizeof(task_group_context*) // my_parent
-        - sizeof(std::atomic<r1::thread_data*>) // my_owner
-        - sizeof(context_list_node) // my_node
-        - sizeof(r1::tbb_exception_ptr*) // my_exception
-        - sizeof(void*) // my_itt_caller
-        - sizeof(string_resource_index) // my_name
-    ];
-
-    task_group_context(context_traits t, string_resource_index name)
-        : my_version{}, my_name{ name } {
-        my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers)
-        r1::initialize(*this);
-    }
-
-    static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) {
-        context_traits ct;
-        ct.bound = relation_with_parent == bound;
-        ct.fp_settings = (user_traits & fp_settings) == fp_settings;
-        ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait;
-        return ct;
-    }
-
-public:
-    //! Default & binding constructor.
-    /** By default a bound context is created. That is this context will be bound
-        (as child) to the context of the currently executing task . Cancellation
-        requests passed to the parent context are propagated to all the contexts
-        bound to it. Similarly priority change is propagated from the parent context
-        to its children.
-
-        If task_group_context::isolated is used as the argument, then the tasks associated
-        with this context will never be affected by events in any other context.
-
-        Creating isolated contexts involve much less overhead, but they have limited
-        utility. Normally when an exception occurs in an algorithm that has nested
-        ones running, it is desirably to have all the nested algorithms cancelled
-        as well. Such a behavior requires nested algorithms to use bound contexts.
-
-        There is one good place where using isolated algorithms is beneficial. It is
-        an external thread. That is if a particular algorithm is invoked directly from
-        the external thread (not from a TBB task), supplying it with explicitly
-        created isolated context will result in a faster algorithm startup.
-
-        VERSIONING NOTE:
-        Implementation(s) of task_group_context constructor(s) cannot be made
-        entirely out-of-line because the run-time version must be set by the user
-        code. This will become critically important for binary compatibility, if
-        we ever have to change the size of the context object. **/
-
-    task_group_context(kind_type relation_with_parent = bound,
-                       std::uintptr_t t = default_traits)
-        : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {}
-
-    // Custom constructor for instrumentation of oneTBB algorithm
-    task_group_context (string_resource_index name )
-        : task_group_context(make_traits(bound, default_traits), name) {}
-
-    // Do not introduce any logic on user side since it might break state propagation assumptions
-    ~task_group_context() {
-        r1::destroy(*this);
-    }
-
-    //! Forcefully reinitializes the context after the task tree it was associated with is completed.
-    /** Because the method assumes that all the tasks that used to be associated with
-        this context have already finished, calling it while the context is still
-        in use somewhere in the task hierarchy leads to undefined behavior.
-
-        IMPORTANT: This method is not thread safe!
-
-        The method does not change the context's parent if it is set. **/
-    void reset() {
-        r1::reset(*this);
-    }
-
-    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
-    /** \return false if cancellation has already been requested, true otherwise.
-
-        Note that canceling never fails. When false is returned, it just means that
-        another thread (or this one) has already sent cancellation request to this
-        context or to one of its ancestors (if this context is bound). It is guaranteed
-        that when this method is concurrently called on the same not yet cancelled
-        context, true will be returned by one and only one invocation. **/
-    bool cancel_group_execution() {
-        return r1::cancel_group_execution(*this);
-    }
-
-    //! Returns true if the context received cancellation request.
-    bool is_group_execution_cancelled() {
-        return r1::is_group_execution_cancelled(*this);
-    }
-
-#if __TBB_FP_CONTEXT
-    //! Captures the current FPU control settings to the context.
-    /** Because the method assumes that all the tasks that used to be associated with
-        this context have already finished, calling it while the context is still
-        in use somewhere in the task hierarchy leads to undefined behavior.
-
-        IMPORTANT: This method is not thread safe!
-
-        The method does not change the FPU control settings of the context's parent. **/
-    void capture_fp_settings() {
-        r1::capture_fp_settings(*this);
-    }
-#endif
-
-    //! Returns the user visible context trait
-    std::uintptr_t traits() const {
-        std::uintptr_t t{};
-        t |= my_traits.fp_settings ? fp_settings : 0;
-        t |= my_traits.concurrent_wait ? concurrent_wait : 0;
-        return t;
-    }
-private:
-    //// TODO: cleanup friends
-    friend class r1::market;
-    friend class r1::thread_data;
-    friend class r1::task_dispatcher;
-    template <bool>
-    friend class r1::context_guard_helper;
-    friend struct r1::task_arena_impl;
-    friend struct r1::task_group_context_impl;
-}; // class task_group_context
-
-static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context");
-
-enum task_group_status {
-    not_complete,
-    complete,
-    canceled
-};
-
-class task_group;
-class structured_task_group;
-#if TBB_PREVIEW_ISOLATED_TASK_GROUP
-class isolated_task_group;
-#endif
-
-template<typename F>
-class function_task : public task {
-    const F m_func;
-    wait_context& m_wait_ctx;
-    small_object_allocator m_allocator;
-
-    void finalize(const execution_data& ed) {
-        // Make a local reference not to access this after destruction.
-        wait_context& wo = m_wait_ctx;
-        // Copy allocator to the stack
-        auto allocator = m_allocator;
-        // Destroy user functor before release wait.
-        this->~function_task();
-        wo.release();
-
-        allocator.deallocate(this, ed);
-    }
-    task* execute(execution_data& ed) override {
-        m_func();
-        finalize(ed);
-        return nullptr;
-    }
-    task* cancel(execution_data& ed) override {
-        finalize(ed);
-        return nullptr;
-    }
-public:
-    function_task(const F& f, wait_context& wo, small_object_allocator& alloc)
-        : m_func(f)
-        , m_wait_ctx(wo)
-        , m_allocator(alloc) {}
-
-    function_task(F&& f, wait_context& wo, small_object_allocator& alloc)
-        : m_func(std::move(f))
-        , m_wait_ctx(wo)
-        , m_allocator(alloc) {}
-};
-
-template <typename F>
-class function_stack_task : public task {
-    const F& m_func;
-    wait_context& m_wait_ctx;
-
-    void finalize() {
-        m_wait_ctx.release();
-    }
-    task* execute(execution_data&) override {
-        m_func();
-        finalize();
-        return nullptr;
-    }
-    task* cancel(execution_data&) override {
-        finalize();
-        return nullptr;
-    }
-public:
-    function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {}
-};
-
-class task_group_base : no_copy {
-protected:
-    wait_context m_wait_ctx;
-    task_group_context m_context;
-
-    template<typename F>
-    task_group_status internal_run_and_wait(const F& f) {
-        function_stack_task<F> t{ f, m_wait_ctx };
-        m_wait_ctx.reserve();
-        bool cancellation_status = false;
-        try_call([&] {
-            execute_and_wait(t, m_context, m_wait_ctx, m_context);
-        }).on_completion([&] {
-            // TODO: the reset method is not thread-safe. Ensure the correct behavior.
-            cancellation_status = m_context.is_group_execution_cancelled();
-            m_context.reset();
-        });
-        return cancellation_status ? canceled : complete;
-    }
-
-    template<typename F>
-    task* prepare_task(F&& f) {
-        m_wait_ctx.reserve();
-        small_object_allocator alloc{};
-        return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc);
-    }
-
-public:
-    task_group_base(uintptr_t traits = 0)
-        : m_wait_ctx(0)
-        , m_context(task_group_context::bound, task_group_context::default_traits | traits)
-    {
-    }
-
-    ~task_group_base() noexcept(false) {
-        if (m_wait_ctx.continue_execution()) {
-#if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT
-            bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0;
-#else
-            bool stack_unwinding_in_progress = std::uncaught_exception();
-#endif
-            // Always attempt to do proper cleanup to avoid inevitable memory corruption
-            // in case of missing wait (for the sake of better testability & debuggability)
-            if (!m_context.is_group_execution_cancelled())
-                cancel();
-            d1::wait(m_wait_ctx, m_context);
-            if (!stack_unwinding_in_progress)
-                throw_exception(exception_id::missing_wait);
-        }
-    }
-
-    task_group_status wait() {
-        bool cancellation_status = false;
-        try_call([&] {
-            d1::wait(m_wait_ctx, m_context);
-        }).on_completion([&] {
-            // TODO: the reset method is not thread-safe. Ensure the correct behavior.
-            cancellation_status = m_context.is_group_execution_cancelled();
-            m_context.reset();
-        });
-        return cancellation_status ? canceled : complete;
-    }
-
-    void cancel() {
-        m_context.cancel_group_execution();
-    }
-}; // class task_group_base
-
-class task_group : public task_group_base {
-public:
-    task_group() : task_group_base(task_group_context::concurrent_wait) {}
-
-    template<typename F>
-    void run(F&& f) {
-        spawn(*prepare_task(std::forward<F>(f)), m_context);
-    }
-
-    template<typename F>
-    task_group_status run_and_wait(const F& f) {
-        return internal_run_and_wait(f);
-    }
-}; // class task_group
-
-#if TBB_PREVIEW_ISOLATED_TASK_GROUP
-class spawn_delegate : public delegate_base {
-    task* task_to_spawn;
-    task_group_context& context;
-    bool operator()() const override {
-        spawn(*task_to_spawn, context);
-        return true;
-    }
-public:
-    spawn_delegate(task* a_task, task_group_context& ctx)
-        : task_to_spawn(a_task), context(ctx)
-    {}
-};
-
-class wait_delegate : public delegate_base {
-    bool operator()() const override {
-        status = tg.wait();
-        return true;
-    }
-protected:
-    task_group& tg;
-    task_group_status& status;
-public:
-    wait_delegate(task_group& a_group, task_group_status& tgs)
-        : tg(a_group), status(tgs) {}
-};
-
-template<typename F>
-class run_wait_delegate : public wait_delegate {
-    F& func;
-    bool operator()() const override {
-        status = tg.run_and_wait(func);
-        return true;
-    }
-public:
-    run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs)
-        : wait_delegate(a_group, tgs), func(a_func) {}
-};
-
-class isolated_task_group : public task_group {
-    intptr_t this_isolation() {
-        return reinterpret_cast<intptr_t>(this);
-    }
-public:
-    isolated_task_group () : task_group() {}
-
-    template<typename F>
-    void run(F&& f) {
-        spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context);
-        r1::isolate_within_arena(sd, this_isolation());
-    }
-
-    template<typename F>
-    task_group_status run_and_wait( const F& f ) {
-        task_group_status result = not_complete;
-        run_wait_delegate<const F> rwd(*this, f, result);
-        r1::isolate_within_arena(rwd, this_isolation());
-        __TBB_ASSERT(result != not_complete, "premature exit from wait?");
-        return result;
-    }
-
-    task_group_status wait() {
-        task_group_status result = not_complete;
-        wait_delegate wd(*this, result);
-        r1::isolate_within_arena(wd, this_isolation());
-        __TBB_ASSERT(result != not_complete, "premature exit from wait?");
-        return result;
-    }
-}; // class isolated_task_group
-#endif // TBB_PREVIEW_ISOLATED_TASK_GROUP
-
-inline bool is_current_task_group_canceling() {
-    task_group_context* ctx = current_context();
-    return ctx ? ctx->is_group_execution_cancelled() : false;
-}
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::task_group_context;
-using detail::d1::task_group;
-#if TBB_PREVIEW_ISOLATED_TASK_GROUP
-using detail::d1::isolated_task_group;
-#endif
-
-using detail::d1::task_group_status;
-using detail::d1::not_complete;
-using detail::d1::complete;
-using detail::d1::canceled;
-
-using detail::d1::is_current_task_group_canceling;
-using detail::r1::missing_wait;
-}
-
-} // namespace tbb
-
-#if _MSC_VER && !defined(__INTEL_COMPILER)
-    #pragma warning(pop) // 4324 warning
-#endif
-
-#endif // __TBB_task_group_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_task_group_H 
+#define __TBB_task_group_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+#include "detail/_template_helpers.h" 
+#include "detail/_utils.h" 
+#include "detail/_exception.h" 
+#include "detail/_task.h" 
+#include "detail/_small_object_pool.h" 
+ 
+#include "profiling.h" 
+ 
+#include <functional> 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    // Suppress warning: structure was padded due to alignment specifier 
+    #pragma warning(push) 
+    #pragma warning(disable:4324) 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class delegate_base; 
+class task_arena_base; 
+class task_group_context; 
+} 
+ 
+namespace r1 { 
+// Forward declarations 
+class tbb_exception_ptr; 
+class market; 
+class thread_data; 
+class task_dispatcher; 
+template <bool> 
+class context_guard_helper; 
+struct task_arena_impl; 
+ 
+void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); 
+void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); 
+ 
+void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); 
+void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); 
+void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); 
+bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); 
+bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); 
+void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); 
+ 
+struct task_group_context_impl; 
+} 
+ 
+namespace d1 { 
+ 
+struct context_list_node { 
+    std::atomic<context_list_node*> prev{}; 
+    std::atomic<context_list_node*> next{}; 
+ 
+    void remove_relaxed() { 
+        context_list_node* p = prev.load(std::memory_order_relaxed); 
+        context_list_node* n = next.load(std::memory_order_relaxed); 
+        p->next.store(n, std::memory_order_relaxed); 
+        n->prev.store(p, std::memory_order_relaxed); 
+    } 
+}; 
+ 
+//! Used to form groups of tasks 
+/** @ingroup task_scheduling 
+    The context services explicit cancellation requests from user code, and unhandled 
+    exceptions intercepted during tasks execution. Intercepting an exception results 
+    in generating internal cancellation requests (which is processed in exactly the 
+    same way as external ones). 
+ 
+    The context is associated with one or more root tasks and defines the cancellation 
+    group that includes all the descendants of the corresponding root task(s). Association 
+    is established when a context object is passed as an argument to the task::allocate_root() 
+    method. See task_group_context::task_group_context for more details. 
+ 
+    The context can be bound to another one, and other contexts can be bound to it, 
+    forming a tree-like structure: parent -> this -> children. Arrows here designate 
+    cancellation propagation direction. If a task in a cancellation group is cancelled 
+    all the other tasks in this group and groups bound to it (as children) get cancelled too. 
+**/ 
+class task_group_context : no_copy { 
+public: 
+    enum traits_type { 
+        fp_settings     = 1 << 1, 
+        concurrent_wait = 1 << 2, 
+        default_traits  = 0 
+    }; 
+    enum kind_type { 
+        isolated, 
+        bound 
+    }; 
+private: 
+    //! Space for platform-specific FPU settings. 
+    /** Must only be accessed inside TBB binaries, and never directly in user 
+    code or inline methods. */ 
+    std::uint64_t my_cpu_ctl_env; 
+ 
+    //! Specifies whether cancellation was requested for this task group. 
+    std::atomic<std::uint32_t> my_cancellation_requested; 
+ 
+    //! Version for run-time checks and behavioral traits of the context. 
+    std::uint8_t my_version; 
+ 
+    //! The context traits. 
+    struct context_traits { 
+        bool fp_settings        : 1; 
+        bool concurrent_wait    : 1; 
+        bool bound              : 1; 
+    } my_traits; 
+ 
+    static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); 
+ 
+    static constexpr std::uint8_t may_have_children = 1; 
+    //! The context internal state (currently only may_have_children). 
+    std::atomic<std::uint8_t> my_state; 
+ 
+    enum class lifetime_state : std::uint8_t { 
+        created, 
+        locked, 
+        isolated, 
+        bound, 
+        detached, 
+        dying 
+    }; 
+ 
+    //! The synchronization machine state to manage lifetime. 
+    std::atomic<lifetime_state> my_lifetime_state; 
+ 
+    //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. 
+    task_group_context* my_parent; 
+ 
+    //! Thread data instance that registered this context in its list. 
+    std::atomic<r1::thread_data*> my_owner; 
+ 
+    //! Used to form the thread specific list of contexts without additional memory allocation. 
+    /** A context is included into the list of the current thread when its binding to 
+        its parent happens. Any context can be present in the list of one thread only. **/ 
+    context_list_node my_node; 
+ 
+    //! Pointer to the container storing exception being propagated across this task group. 
+    r1::tbb_exception_ptr* my_exception; 
+ 
+    //! Used to set and maintain stack stitching point for Intel Performance Tools. 
+    void* my_itt_caller; 
+ 
+    //! Description of algorithm for scheduler based instrumentation. 
+    string_resource_index my_name; 
+ 
+    char padding[max_nfs_size 
+        - sizeof(std::uint64_t) // my_cpu_ctl_env 
+        - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested 
+        - sizeof(std::uint8_t) // my_version 
+        - sizeof(context_traits) // my_traits 
+        - sizeof(std::atomic<std::uint8_t>) // my_state 
+        - sizeof(std::atomic<lifetime_state>) // my_lifetime_state 
+        - sizeof(task_group_context*) // my_parent 
+        - sizeof(std::atomic<r1::thread_data*>) // my_owner 
+        - sizeof(context_list_node) // my_node 
+        - sizeof(r1::tbb_exception_ptr*) // my_exception 
+        - sizeof(void*) // my_itt_caller 
+        - sizeof(string_resource_index) // my_name 
+    ]; 
+ 
+    task_group_context(context_traits t, string_resource_index name) 
+        : my_version{}, my_name{ name } { 
+        my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) 
+        r1::initialize(*this); 
+    } 
+ 
+    static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { 
+        context_traits ct; 
+        ct.bound = relation_with_parent == bound; 
+        ct.fp_settings = (user_traits & fp_settings) == fp_settings; 
+        ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; 
+        return ct; 
+    } 
+ 
+public: 
+    //! Default & binding constructor. 
+    /** By default a bound context is created. That is this context will be bound 
+        (as child) to the context of the currently executing task . Cancellation 
+        requests passed to the parent context are propagated to all the contexts 
+        bound to it. Similarly priority change is propagated from the parent context 
+        to its children. 
+ 
+        If task_group_context::isolated is used as the argument, then the tasks associated 
+        with this context will never be affected by events in any other context. 
+ 
+        Creating isolated contexts involve much less overhead, but they have limited 
+        utility. Normally when an exception occurs in an algorithm that has nested 
+        ones running, it is desirably to have all the nested algorithms cancelled 
+        as well. Such a behavior requires nested algorithms to use bound contexts. 
+ 
+        There is one good place where using isolated algorithms is beneficial. It is 
+        an external thread. That is if a particular algorithm is invoked directly from 
+        the external thread (not from a TBB task), supplying it with explicitly 
+        created isolated context will result in a faster algorithm startup. 
+ 
+        VERSIONING NOTE: 
+        Implementation(s) of task_group_context constructor(s) cannot be made 
+        entirely out-of-line because the run-time version must be set by the user 
+        code. This will become critically important for binary compatibility, if 
+        we ever have to change the size of the context object. **/ 
+ 
+    task_group_context(kind_type relation_with_parent = bound, 
+                       std::uintptr_t t = default_traits) 
+        : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} 
+ 
+    // Custom constructor for instrumentation of oneTBB algorithm 
+    task_group_context (string_resource_index name ) 
+        : task_group_context(make_traits(bound, default_traits), name) {} 
+ 
+    // Do not introduce any logic on user side since it might break state propagation assumptions 
+    ~task_group_context() { 
+        r1::destroy(*this); 
+    } 
+ 
+    //! Forcefully reinitializes the context after the task tree it was associated with is completed. 
+    /** Because the method assumes that all the tasks that used to be associated with 
+        this context have already finished, calling it while the context is still 
+        in use somewhere in the task hierarchy leads to undefined behavior. 
+ 
+        IMPORTANT: This method is not thread safe! 
+ 
+        The method does not change the context's parent if it is set. **/ 
+    void reset() { 
+        r1::reset(*this); 
+    } 
+ 
+    //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. 
+    /** \return false if cancellation has already been requested, true otherwise. 
+ 
+        Note that canceling never fails. When false is returned, it just means that 
+        another thread (or this one) has already sent cancellation request to this 
+        context or to one of its ancestors (if this context is bound). It is guaranteed 
+        that when this method is concurrently called on the same not yet cancelled 
+        context, true will be returned by one and only one invocation. **/ 
+    bool cancel_group_execution() { 
+        return r1::cancel_group_execution(*this); 
+    } 
+ 
+    //! Returns true if the context received cancellation request. 
+    bool is_group_execution_cancelled() { 
+        return r1::is_group_execution_cancelled(*this); 
+    } 
+ 
+#if __TBB_FP_CONTEXT 
+    //! Captures the current FPU control settings to the context. 
+    /** Because the method assumes that all the tasks that used to be associated with 
+        this context have already finished, calling it while the context is still 
+        in use somewhere in the task hierarchy leads to undefined behavior. 
+ 
+        IMPORTANT: This method is not thread safe! 
+ 
+        The method does not change the FPU control settings of the context's parent. **/ 
+    void capture_fp_settings() { 
+        r1::capture_fp_settings(*this); 
+    } 
+#endif 
+ 
+    //! Returns the user visible context trait 
+    std::uintptr_t traits() const { 
+        std::uintptr_t t{}; 
+        t |= my_traits.fp_settings ? fp_settings : 0; 
+        t |= my_traits.concurrent_wait ? concurrent_wait : 0; 
+        return t; 
+    } 
+private: 
+    //// TODO: cleanup friends 
+    friend class r1::market; 
+    friend class r1::thread_data; 
+    friend class r1::task_dispatcher; 
+    template <bool> 
+    friend class r1::context_guard_helper; 
+    friend struct r1::task_arena_impl; 
+    friend struct r1::task_group_context_impl; 
+}; // class task_group_context 
+ 
+static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); 
+ 
+enum task_group_status { 
+    not_complete, 
+    complete, 
+    canceled 
+}; 
+ 
+class task_group; 
+class structured_task_group; 
+#if TBB_PREVIEW_ISOLATED_TASK_GROUP 
+class isolated_task_group; 
+#endif 
+ 
+template<typename F> 
+class function_task : public task { 
+    const F m_func; 
+    wait_context& m_wait_ctx; 
+    small_object_allocator m_allocator; 
+ 
+    void finalize(const execution_data& ed) { 
+        // Make a local reference not to access this after destruction. 
+        wait_context& wo = m_wait_ctx; 
+        // Copy allocator to the stack 
+        auto allocator = m_allocator; 
+        // Destroy user functor before release wait. 
+        this->~function_task(); 
+        wo.release(); 
+ 
+        allocator.deallocate(this, ed); 
+    } 
+    task* execute(execution_data& ed) override { 
+        m_func(); 
+        finalize(ed); 
+        return nullptr; 
+    } 
+    task* cancel(execution_data& ed) override { 
+        finalize(ed); 
+        return nullptr; 
+    } 
+public: 
+    function_task(const F& f, wait_context& wo, small_object_allocator& alloc) 
+        : m_func(f) 
+        , m_wait_ctx(wo) 
+        , m_allocator(alloc) {} 
+ 
+    function_task(F&& f, wait_context& wo, small_object_allocator& alloc) 
+        : m_func(std::move(f)) 
+        , m_wait_ctx(wo) 
+        , m_allocator(alloc) {} 
+}; 
+ 
+template <typename F> 
+class function_stack_task : public task { 
+    const F& m_func; 
+    wait_context& m_wait_ctx; 
+ 
+    void finalize() { 
+        m_wait_ctx.release(); 
+    } 
+    task* execute(execution_data&) override { 
+        m_func(); 
+        finalize(); 
+        return nullptr; 
+    } 
+    task* cancel(execution_data&) override { 
+        finalize(); 
+        return nullptr; 
+    } 
+public: 
+    function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} 
+}; 
+ 
+class task_group_base : no_copy { 
+protected: 
+    wait_context m_wait_ctx; 
+    task_group_context m_context; 
+ 
+    template<typename F> 
+    task_group_status internal_run_and_wait(const F& f) { 
+        function_stack_task<F> t{ f, m_wait_ctx }; 
+        m_wait_ctx.reserve(); 
+        bool cancellation_status = false; 
+        try_call([&] { 
+            execute_and_wait(t, m_context, m_wait_ctx, m_context); 
+        }).on_completion([&] { 
+            // TODO: the reset method is not thread-safe. Ensure the correct behavior. 
+            cancellation_status = m_context.is_group_execution_cancelled(); 
+            m_context.reset(); 
+        }); 
+        return cancellation_status ? canceled : complete; 
+    } 
+ 
+    template<typename F> 
+    task* prepare_task(F&& f) { 
+        m_wait_ctx.reserve(); 
+        small_object_allocator alloc{}; 
+        return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); 
+    } 
+ 
+public: 
+    task_group_base(uintptr_t traits = 0) 
+        : m_wait_ctx(0) 
+        , m_context(task_group_context::bound, task_group_context::default_traits | traits) 
+    { 
+    } 
+ 
+    ~task_group_base() noexcept(false) { 
+        if (m_wait_ctx.continue_execution()) { 
+#if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT 
+            bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; 
+#else 
+            bool stack_unwinding_in_progress = std::uncaught_exception(); 
+#endif 
+            // Always attempt to do proper cleanup to avoid inevitable memory corruption 
+            // in case of missing wait (for the sake of better testability & debuggability) 
+            if (!m_context.is_group_execution_cancelled()) 
+                cancel(); 
+            d1::wait(m_wait_ctx, m_context); 
+            if (!stack_unwinding_in_progress) 
+                throw_exception(exception_id::missing_wait); 
+        } 
+    } 
+ 
+    task_group_status wait() { 
+        bool cancellation_status = false; 
+        try_call([&] { 
+            d1::wait(m_wait_ctx, m_context); 
+        }).on_completion([&] { 
+            // TODO: the reset method is not thread-safe. Ensure the correct behavior. 
+            cancellation_status = m_context.is_group_execution_cancelled(); 
+            m_context.reset(); 
+        }); 
+        return cancellation_status ? canceled : complete; 
+    } 
+ 
+    void cancel() { 
+        m_context.cancel_group_execution(); 
+    } 
+}; // class task_group_base 
+ 
+class task_group : public task_group_base { 
+public: 
+    task_group() : task_group_base(task_group_context::concurrent_wait) {} 
+ 
+    template<typename F> 
+    void run(F&& f) { 
+        spawn(*prepare_task(std::forward<F>(f)), m_context); 
+    } 
+ 
+    template<typename F> 
+    task_group_status run_and_wait(const F& f) { 
+        return internal_run_and_wait(f); 
+    } 
+}; // class task_group 
+ 
+#if TBB_PREVIEW_ISOLATED_TASK_GROUP 
+class spawn_delegate : public delegate_base { 
+    task* task_to_spawn; 
+    task_group_context& context; 
+    bool operator()() const override { 
+        spawn(*task_to_spawn, context); 
+        return true; 
+    } 
+public: 
+    spawn_delegate(task* a_task, task_group_context& ctx) 
+        : task_to_spawn(a_task), context(ctx) 
+    {} 
+}; 
+ 
+class wait_delegate : public delegate_base { 
+    bool operator()() const override { 
+        status = tg.wait(); 
+        return true; 
+    } 
+protected: 
+    task_group& tg; 
+    task_group_status& status; 
+public: 
+    wait_delegate(task_group& a_group, task_group_status& tgs) 
+        : tg(a_group), status(tgs) {} 
+}; 
+ 
+template<typename F> 
+class run_wait_delegate : public wait_delegate { 
+    F& func; 
+    bool operator()() const override { 
+        status = tg.run_and_wait(func); 
+        return true; 
+    } 
+public: 
+    run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs) 
+        : wait_delegate(a_group, tgs), func(a_func) {} 
+}; 
+ 
+class isolated_task_group : public task_group { 
+    intptr_t this_isolation() { 
+        return reinterpret_cast<intptr_t>(this); 
+    } 
+public: 
+    isolated_task_group () : task_group() {} 
+ 
+    template<typename F> 
+    void run(F&& f) { 
+        spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context); 
+        r1::isolate_within_arena(sd, this_isolation()); 
+    } 
+ 
+    template<typename F> 
+    task_group_status run_and_wait( const F& f ) { 
+        task_group_status result = not_complete; 
+        run_wait_delegate<const F> rwd(*this, f, result); 
+        r1::isolate_within_arena(rwd, this_isolation()); 
+        __TBB_ASSERT(result != not_complete, "premature exit from wait?"); 
+        return result; 
+    } 
+ 
+    task_group_status wait() { 
+        task_group_status result = not_complete; 
+        wait_delegate wd(*this, result); 
+        r1::isolate_within_arena(wd, this_isolation()); 
+        __TBB_ASSERT(result != not_complete, "premature exit from wait?"); 
+        return result; 
+    } 
+}; // class isolated_task_group 
+#endif // TBB_PREVIEW_ISOLATED_TASK_GROUP 
+ 
+inline bool is_current_task_group_canceling() { 
+    task_group_context* ctx = current_context(); 
+    return ctx ? ctx->is_group_execution_cancelled() : false; 
+} 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::task_group_context; 
+using detail::d1::task_group; 
+#if TBB_PREVIEW_ISOLATED_TASK_GROUP 
+using detail::d1::isolated_task_group; 
+#endif 
+ 
+using detail::d1::task_group_status; 
+using detail::d1::not_complete; 
+using detail::d1::complete; 
+using detail::d1::canceled; 
+ 
+using detail::d1::is_current_task_group_canceling; 
+using detail::r1::missing_wait; 
+} 
+ 
+} // namespace tbb 
+ 
+#if _MSC_VER && !defined(__INTEL_COMPILER) 
+    #pragma warning(pop) // 4324 warning 
+#endif 
+ 
+#endif // __TBB_task_group_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h
index 276ca70707..0f6f25f124 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h
@@ -1,116 +1,116 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_task_scheduler_observer_H
-#define __TBB_task_scheduler_observer_H
-
-#include "detail/_namespace_injection.h"
-#include "task_arena.h"
-#include <atomic>
-
-namespace tbb {
-namespace detail {
-
-namespace d1 {
-class task_scheduler_observer;
-}
-
-namespace r1 {
-class observer_proxy;
-class observer_list;
-
-//! Enable or disable observation
-/** For local observers the method can be used only when the current thread
-has the task scheduler initialized or is attached to an arena.
-Repeated calls with the same state are no-ops. **/
-void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true);
-}
-
-namespace d1 {
-class task_scheduler_observer {
-    friend class r1::observer_proxy;
-    friend class r1::observer_list;
-    friend void r1::observe(d1::task_scheduler_observer&, bool);
-
-    //! Pointer to the proxy holding this observer.
-    /** Observers are proxied by the scheduler to maintain persistent lists of them. **/
-    std::atomic<r1::observer_proxy*> my_proxy{ nullptr };
-
-    //! Counter preventing the observer from being destroyed while in use by the scheduler.
-    /** Valid only when observation is on. **/
-    std::atomic<intptr_t> my_busy_count{ 0 };
-
-    //! Contains task_arena pointer
-    task_arena* my_task_arena{ nullptr };
-public:
-    //! Returns true if observation is enabled, false otherwise.
-    bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; }
-
-    //! Entry notification
-    /** Invoked from inside observe(true) call and whenever a worker enters the arena
-        this observer is associated with. If a thread is already in the arena when
-        the observer is activated, the entry notification is called before it
-        executes the first stolen task. **/
-    virtual void on_scheduler_entry( bool /*is_worker*/ ) {}
-
-    //! Exit notification
-    /** Invoked from inside observe(false) call and whenever a worker leaves the
-        arena this observer is associated with. **/
-    virtual void on_scheduler_exit( bool /*is_worker*/ ) {}
-
-    //! Construct local or global observer in inactive state (observation disabled).
-    /** For a local observer entry/exit notifications are invoked whenever a worker
-        thread joins/leaves the arena of the observer's owner thread. If a thread is
-        already in the arena when the observer is activated, the entry notification is
-        called before it executes the first stolen task. **/
-    explicit task_scheduler_observer() = default;
-
-    //! Construct local observer for a given arena in inactive state (observation disabled).
-    /** entry/exit notifications are invoked whenever a thread joins/leaves arena.
-        If a thread is already in the arena when the observer is activated, the entry notification
-        is called before it executes the first stolen task. **/
-    explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {}
-
-    /** Destructor protects instance of the observer from concurrent notification.
-       It is recommended to disable observation before destructor of a derived class starts,
-       otherwise it can lead to concurrent notification callback on partly destroyed object **/
-    virtual ~task_scheduler_observer() {
-        if (my_proxy.load(std::memory_order_relaxed)) {
-            observe(false);
-        }
-    }
-
-    //! Enable or disable observation
-    /** Warning: concurrent invocations of this method are not safe.
-        Repeated calls with the same state are no-ops. **/
-    void observe(bool state = true) {
-        if( state && !my_proxy.load(std::memory_order_relaxed) ) {
-            __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance");
-        }
-        r1::observe(*this, state);
-    }
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::d1::task_scheduler_observer;
-}
-} // namespace tbb
-
-
-#endif /* __TBB_task_scheduler_observer_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_task_scheduler_observer_H 
+#define __TBB_task_scheduler_observer_H 
+ 
+#include "detail/_namespace_injection.h" 
+#include "task_arena.h" 
+#include <atomic> 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace d1 { 
+class task_scheduler_observer; 
+} 
+ 
+namespace r1 { 
+class observer_proxy; 
+class observer_list; 
+ 
+//! Enable or disable observation 
+/** For local observers the method can be used only when the current thread 
+has the task scheduler initialized or is attached to an arena. 
+Repeated calls with the same state are no-ops. **/ 
+void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); 
+} 
+ 
+namespace d1 { 
+class task_scheduler_observer { 
+    friend class r1::observer_proxy; 
+    friend class r1::observer_list; 
+    friend void r1::observe(d1::task_scheduler_observer&, bool); 
+ 
+    //! Pointer to the proxy holding this observer. 
+    /** Observers are proxied by the scheduler to maintain persistent lists of them. **/ 
+    std::atomic<r1::observer_proxy*> my_proxy{ nullptr }; 
+ 
+    //! Counter preventing the observer from being destroyed while in use by the scheduler. 
+    /** Valid only when observation is on. **/ 
+    std::atomic<intptr_t> my_busy_count{ 0 }; 
+ 
+    //! Contains task_arena pointer 
+    task_arena* my_task_arena{ nullptr }; 
+public: 
+    //! Returns true if observation is enabled, false otherwise. 
+    bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; } 
+ 
+    //! Entry notification 
+    /** Invoked from inside observe(true) call and whenever a worker enters the arena 
+        this observer is associated with. If a thread is already in the arena when 
+        the observer is activated, the entry notification is called before it 
+        executes the first stolen task. **/ 
+    virtual void on_scheduler_entry( bool /*is_worker*/ ) {} 
+ 
+    //! Exit notification 
+    /** Invoked from inside observe(false) call and whenever a worker leaves the 
+        arena this observer is associated with. **/ 
+    virtual void on_scheduler_exit( bool /*is_worker*/ ) {} 
+ 
+    //! Construct local or global observer in inactive state (observation disabled). 
+    /** For a local observer entry/exit notifications are invoked whenever a worker 
+        thread joins/leaves the arena of the observer's owner thread. If a thread is 
+        already in the arena when the observer is activated, the entry notification is 
+        called before it executes the first stolen task. **/ 
+    explicit task_scheduler_observer() = default; 
+ 
+    //! Construct local observer for a given arena in inactive state (observation disabled). 
+    /** entry/exit notifications are invoked whenever a thread joins/leaves arena. 
+        If a thread is already in the arena when the observer is activated, the entry notification 
+        is called before it executes the first stolen task. **/ 
+    explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {} 
+ 
+    /** Destructor protects instance of the observer from concurrent notification. 
+       It is recommended to disable observation before destructor of a derived class starts, 
+       otherwise it can lead to concurrent notification callback on partly destroyed object **/ 
+    virtual ~task_scheduler_observer() { 
+        if (my_proxy.load(std::memory_order_relaxed)) { 
+            observe(false); 
+        } 
+    } 
+ 
+    //! Enable or disable observation 
+    /** Warning: concurrent invocations of this method are not safe. 
+        Repeated calls with the same state are no-ops. **/ 
+    void observe(bool state = true) { 
+        if( state && !my_proxy.load(std::memory_order_relaxed) ) { 
+            __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance"); 
+        } 
+        r1::observe(*this, state); 
+    } 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::d1::task_scheduler_observer; 
+} 
+} // namespace tbb 
+ 
+ 
+#endif /* __TBB_task_scheduler_observer_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h
index 3da61a009d..1018a15793 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h
@@ -1,126 +1,126 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_tbb_allocator_H
-#define __TBB_tbb_allocator_H
-
-#include "oneapi/tbb/detail/_utils.h"
-#include "detail/_namespace_injection.h"
-#include <cstdlib>
-#include <utility>
-
-#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
-#error #include <memory_resource>
-#endif
-
-namespace tbb {
-namespace detail {
-
-namespace r1 {
-void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size);
-void  __TBB_EXPORTED_FUNC deallocate_memory(void* p);
-bool  __TBB_EXPORTED_FUNC is_tbbmalloc_used();
-}
-
-namespace d1 {
-
-template<typename T>
-class tbb_allocator {
-public:
-    using value_type = T;
-    using propagate_on_container_move_assignment = std::true_type;
-
-    //! Always defined for TBB containers (supported since C++17 for std containers)
-    using is_always_equal = std::true_type;
-
-    //! Specifies current allocator
-    enum malloc_type {
-        scalable,
-        standard
-    };
-
-    tbb_allocator() = default;
-    template<typename U> tbb_allocator(const tbb_allocator<U>&) noexcept {}
-
-    //! Allocate space for n objects.
-    __TBB_nodiscard T* allocate(std::size_t n) {
-        return static_cast<T*>(r1::allocate_memory(n * sizeof(value_type)));
-    }
-
-    //! Free previously allocated block of memory.
-    void deallocate(T* p, std::size_t) {
-        r1::deallocate_memory(p);
-    }
-
-    //! Returns current allocator
-    static malloc_type allocator_type() {
-        return r1::is_tbbmalloc_used() ? standard : scalable;
-    }
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    using pointer = value_type*;
-    using const_pointer = const value_type*;
-    using reference = value_type&;
-    using const_reference = const value_type&;
-    using difference_type = std::ptrdiff_t;
-    using size_type = std::size_t;
-    template<typename U> struct rebind {
-        using other = tbb_allocator<U>;
-    };
-    //! Largest value for which method allocate might succeed.
-    size_type max_size() const noexcept {
-        size_type max = ~(std::size_t(0)) / sizeof(value_type);
-        return (max > 0 ? max : 1);
-    }
-    template<typename U, typename... Args>
-    void construct(U *p, Args&&... args)
-        { ::new (p) U(std::forward<Args>(args)...); }
-    void destroy( pointer p ) { p->~value_type(); }
-    pointer address(reference x) const { return &x; }
-    const_pointer address(const_reference x) const { return &x; }
-#endif // TBB_ALLOCATOR_TRAITS_BROKEN
-};
-
-#if TBB_ALLOCATOR_TRAITS_BROKEN
-    template<>
-    class tbb_allocator<void> {
-    public:
-        using pointer = void*;
-        using const_pointer = const void*;
-        using value_type = void;
-        template<typename U> struct rebind {
-            using other = tbb_allocator<U>;
-        };
-    };
-#endif
-
-template<typename T, typename U>
-inline bool operator==(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return true; }
-
-#if !__TBB_CPP20_COMPARISONS_PRESENT
-template<typename T, typename U>
-inline bool operator!=(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return false; }
-#endif
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-using detail::d1::tbb_allocator;
-} // namespace v1
-} // namespace tbb
-
-#endif /* __TBB_tbb_allocator_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_tbb_allocator_H 
+#define __TBB_tbb_allocator_H 
+ 
+#include "oneapi/tbb/detail/_utils.h" 
+#include "detail/_namespace_injection.h" 
+#include <cstdlib> 
+#include <utility> 
+ 
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT 
+#error #include <memory_resource> 
+#endif 
+ 
+namespace tbb { 
+namespace detail { 
+ 
+namespace r1 { 
+void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); 
+void  __TBB_EXPORTED_FUNC deallocate_memory(void* p); 
+bool  __TBB_EXPORTED_FUNC is_tbbmalloc_used(); 
+} 
+ 
+namespace d1 { 
+ 
+template<typename T> 
+class tbb_allocator { 
+public: 
+    using value_type = T; 
+    using propagate_on_container_move_assignment = std::true_type; 
+ 
+    //! Always defined for TBB containers (supported since C++17 for std containers) 
+    using is_always_equal = std::true_type; 
+ 
+    //! Specifies current allocator 
+    enum malloc_type { 
+        scalable, 
+        standard 
+    }; 
+ 
+    tbb_allocator() = default; 
+    template<typename U> tbb_allocator(const tbb_allocator<U>&) noexcept {} 
+ 
+    //! Allocate space for n objects. 
+    __TBB_nodiscard T* allocate(std::size_t n) { 
+        return static_cast<T*>(r1::allocate_memory(n * sizeof(value_type))); 
+    } 
+ 
+    //! Free previously allocated block of memory. 
+    void deallocate(T* p, std::size_t) { 
+        r1::deallocate_memory(p); 
+    } 
+ 
+    //! Returns current allocator 
+    static malloc_type allocator_type() { 
+        return r1::is_tbbmalloc_used() ? standard : scalable; 
+    } 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    using pointer = value_type*; 
+    using const_pointer = const value_type*; 
+    using reference = value_type&; 
+    using const_reference = const value_type&; 
+    using difference_type = std::ptrdiff_t; 
+    using size_type = std::size_t; 
+    template<typename U> struct rebind { 
+        using other = tbb_allocator<U>; 
+    }; 
+    //! Largest value for which method allocate might succeed. 
+    size_type max_size() const noexcept { 
+        size_type max = ~(std::size_t(0)) / sizeof(value_type); 
+        return (max > 0 ? max : 1); 
+    } 
+    template<typename U, typename... Args> 
+    void construct(U *p, Args&&... args) 
+        { ::new (p) U(std::forward<Args>(args)...); } 
+    void destroy( pointer p ) { p->~value_type(); } 
+    pointer address(reference x) const { return &x; } 
+    const_pointer address(const_reference x) const { return &x; } 
+#endif // TBB_ALLOCATOR_TRAITS_BROKEN 
+}; 
+ 
+#if TBB_ALLOCATOR_TRAITS_BROKEN 
+    template<> 
+    class tbb_allocator<void> { 
+    public: 
+        using pointer = void*; 
+        using const_pointer = const void*; 
+        using value_type = void; 
+        template<typename U> struct rebind { 
+            using other = tbb_allocator<U>; 
+        }; 
+    }; 
+#endif 
+ 
+template<typename T, typename U> 
+inline bool operator==(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return true; } 
+ 
+#if !__TBB_CPP20_COMPARISONS_PRESENT 
+template<typename T, typename U> 
+inline bool operator!=(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return false; } 
+#endif 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+using detail::d1::tbb_allocator; 
+} // namespace v1 
+} // namespace tbb 
+ 
+#endif /* __TBB_tbb_allocator_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h
index 0ba38f215e..bb7c143ee5 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h
@@ -1,65 +1,65 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-/*
-Replacing the standard memory allocation routines in Microsoft* C/C++ RTL
-(malloc/free, global new/delete, etc.) with the TBB memory allocator.
-
-Include the following header to a source of any binary which is loaded during
-application startup
-
-#include "oneapi/tbb/tbbmalloc_proxy.h"
-
-or add following parameters to the linker options for the binary which is
-loaded during application startup. It can be either exe-file or dll.
-
-For win32
-tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy"
-win64
-tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy"
-*/
-
-#ifndef __TBB_tbbmalloc_proxy_H
-#define __TBB_tbbmalloc_proxy_H
-
-#if _MSC_VER
-
-#ifdef _DEBUG
-    #pragma comment(lib, "tbbmalloc_proxy_debug.lib")
-#else
-    #pragma comment(lib, "tbbmalloc_proxy.lib")
-#endif
-
-#if defined(_WIN64)
-    #pragma comment(linker, "/include:__TBB_malloc_proxy")
-#else
-    #pragma comment(linker, "/include:___TBB_malloc_proxy")
-#endif
-
-#else
-/* Primarily to support MinGW */
-
-extern "C" void __TBB_malloc_proxy();
-struct __TBB_malloc_proxy_caller {
-    __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); }
-} volatile __TBB_malloc_proxy_helper_object;
-
-#endif // _MSC_VER
-
-/* Public Windows API */
-extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr);
-
-#endif //__TBB_tbbmalloc_proxy_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+/* 
+Replacing the standard memory allocation routines in Microsoft* C/C++ RTL 
+(malloc/free, global new/delete, etc.) with the TBB memory allocator. 
+ 
+Include the following header to a source of any binary which is loaded during 
+application startup 
+ 
+#include "oneapi/tbb/tbbmalloc_proxy.h" 
+ 
+or add following parameters to the linker options for the binary which is 
+loaded during application startup. It can be either exe-file or dll. 
+ 
+For win32 
+tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" 
+win64 
+tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy" 
+*/ 
+ 
+#ifndef __TBB_tbbmalloc_proxy_H 
+#define __TBB_tbbmalloc_proxy_H 
+ 
+#if _MSC_VER 
+ 
+#ifdef _DEBUG 
+    #pragma comment(lib, "tbbmalloc_proxy_debug.lib") 
+#else 
+    #pragma comment(lib, "tbbmalloc_proxy.lib") 
+#endif 
+ 
+#if defined(_WIN64) 
+    #pragma comment(linker, "/include:__TBB_malloc_proxy") 
+#else 
+    #pragma comment(linker, "/include:___TBB_malloc_proxy") 
+#endif 
+ 
+#else 
+/* Primarily to support MinGW */ 
+ 
+extern "C" void __TBB_malloc_proxy(); 
+struct __TBB_malloc_proxy_caller { 
+    __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); } 
+} volatile __TBB_malloc_proxy_helper_object; 
+ 
+#endif // _MSC_VER 
+ 
+/* Public Windows API */ 
+extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr); 
+ 
+#endif //__TBB_tbbmalloc_proxy_H 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/tick_count.h b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h
index 2caa56ba18..96fbf3d5d1 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/tick_count.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h
@@ -1,99 +1,99 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_tick_count_H
-#define __TBB_tick_count_H
-
-#include <chrono>
-
-#include "detail/_namespace_injection.h"
-
-namespace tbb {
-namespace detail {
-namespace d1 {
-
-
-//! Absolute timestamp
-/** @ingroup timing */
-class tick_count {
-public:
-    using clock_type = typename std::conditional<std::chrono::high_resolution_clock::is_steady,
-        std::chrono::high_resolution_clock, std::chrono::steady_clock>::type;
-
-    //! Relative time interval.
-    class interval_t : public clock_type::duration {
-    public:
-        //! Construct a time interval representing zero time duration
-        interval_t() : clock_type::duration(clock_type::duration::zero()) {}
-
-        //! Construct a time interval representing sec seconds time duration
-        explicit interval_t( double sec )
-            : clock_type::duration(std::chrono::duration_cast<clock_type::duration>(std::chrono::duration<double>(sec))) {}
-
-        //! Return the length of a time interval in seconds
-        double seconds() const {
-            return std::chrono::duration_cast<std::chrono::duration<double>>(*this).count();
-        }
-
-        //! Extract the intervals from the tick_counts and subtract them.
-        friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
-
-        //! Add two intervals.
-        friend interval_t operator+( const interval_t& i, const interval_t& j ) {
-            return interval_t(std::chrono::operator+(i, j));
-        }
-
-        //! Subtract two intervals.
-        friend interval_t operator-( const interval_t& i, const interval_t& j ) {
-            return interval_t(std::chrono::operator-(i, j));
-        }
-
-    private:
-        explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {}
-    };
-
-    tick_count() = default;
-
-    //! Return current time.
-    static tick_count now() {
-        return clock_type::now();
-    }
-
-    //! Subtract two timestamps to get the time interval between
-    friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) {
-        return tick_count::interval_t(t1.my_time_point - t0.my_time_point);
-    }
-
-    //! Return the resolution of the clock in seconds per tick.
-    static double resolution() {
-        return static_cast<double>(interval_t::period::num) / interval_t::period::den;
-    }
-
-private:
-    clock_type::time_point my_time_point;
-    tick_count( clock_type::time_point tp ) : my_time_point(tp) {}
-};
-
-} // namespace d1
-} // namespace detail
-
-inline namespace v1 {
-    using detail::d1::tick_count;
-} // namespace v1
-
-} // namespace tbb
-
-#endif /* __TBB_tick_count_H */
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_tick_count_H 
+#define __TBB_tick_count_H 
+ 
+#include <chrono> 
+ 
+#include "detail/_namespace_injection.h" 
+ 
+namespace tbb { 
+namespace detail { 
+namespace d1 { 
+ 
+ 
+//! Absolute timestamp 
+/** @ingroup timing */ 
+class tick_count { 
+public: 
+    using clock_type = typename std::conditional<std::chrono::high_resolution_clock::is_steady, 
+        std::chrono::high_resolution_clock, std::chrono::steady_clock>::type; 
+ 
+    //! Relative time interval. 
+    class interval_t : public clock_type::duration { 
+    public: 
+        //! Construct a time interval representing zero time duration 
+        interval_t() : clock_type::duration(clock_type::duration::zero()) {} 
+ 
+        //! Construct a time interval representing sec seconds time duration 
+        explicit interval_t( double sec ) 
+            : clock_type::duration(std::chrono::duration_cast<clock_type::duration>(std::chrono::duration<double>(sec))) {} 
+ 
+        //! Return the length of a time interval in seconds 
+        double seconds() const { 
+            return std::chrono::duration_cast<std::chrono::duration<double>>(*this).count(); 
+        } 
+ 
+        //! Extract the intervals from the tick_counts and subtract them. 
+        friend interval_t operator-( const tick_count& t1, const tick_count& t0 ); 
+ 
+        //! Add two intervals. 
+        friend interval_t operator+( const interval_t& i, const interval_t& j ) { 
+            return interval_t(std::chrono::operator+(i, j)); 
+        } 
+ 
+        //! Subtract two intervals. 
+        friend interval_t operator-( const interval_t& i, const interval_t& j ) { 
+            return interval_t(std::chrono::operator-(i, j)); 
+        } 
+ 
+    private: 
+        explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {} 
+    }; 
+ 
+    tick_count() = default; 
+ 
+    //! Return current time. 
+    static tick_count now() { 
+        return clock_type::now(); 
+    } 
+ 
+    //! Subtract two timestamps to get the time interval between 
+    friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) { 
+        return tick_count::interval_t(t1.my_time_point - t0.my_time_point); 
+    } 
+ 
+    //! Return the resolution of the clock in seconds per tick. 
+    static double resolution() { 
+        return static_cast<double>(interval_t::period::num) / interval_t::period::den; 
+    } 
+ 
+private: 
+    clock_type::time_point my_time_point; 
+    tick_count( clock_type::time_point tp ) : my_time_point(tp) {} 
+}; 
+ 
+} // namespace d1 
+} // namespace detail 
+ 
+inline namespace v1 { 
+    using detail::d1::tick_count; 
+} // namespace v1 
+ 
+} // namespace tbb 
+ 
+#endif /* __TBB_tick_count_H */ 
diff --git a/contrib/libs/tbb/include/oneapi/tbb/version.h b/contrib/libs/tbb/include/oneapi/tbb/version.h
index 1e3507cd9b..a791937df8 100644
--- a/contrib/libs/tbb/include/oneapi/tbb/version.h
+++ b/contrib/libs/tbb/include/oneapi/tbb/version.h
@@ -1,108 +1,108 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#ifndef __TBB_version_H
-#define __TBB_version_H
-
-#include "detail/_config.h"
-#include "detail/_namespace_injection.h"
-
-// Product version
-#define TBB_VERSION_MAJOR 2021
-// Update version
-#define TBB_VERSION_MINOR 2
-// "Patch" version for custom releases
-#define TBB_VERSION_PATCH 0
-// Suffix string
-#define __TBB_VERSION_SUFFIX ""
-// Full official version string
-#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX
-
-// OneAPI oneTBB specification version
-#define ONETBB_SPEC_VERSION "1.0"
-// Full interface version
-#define TBB_INTERFACE_VERSION 12020
-// Major interface version
-#define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000)
-// Minor interface version
-#define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10)
-
-// The binary compatibility version
-// To be used in SONAME, manifests, etc.
-#define __TBB_BINARY_VERSION 12
-
-//! TBB_VERSION support
-#ifndef ENDL
-#define ENDL "\n"
-#endif
-
-//TBB_REVAMP_TODO: consider enabling version_string.ver generation
-//TBB_REVAMP_TODO: #include "version_string.ver"
-
-#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL
-#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL
-#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL
-
-#ifndef TBB_USE_DEBUG
-    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL
-#elif TBB_USE_DEBUG==0
-    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL
-#elif TBB_USE_DEBUG==1
-    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL
-#elif TBB_USE_DEBUG==2
-    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL
-#else
-    #error Unexpected value for TBB_USE_DEBUG
-#endif
-
-#ifndef TBB_USE_ASSERT
-    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL
-#elif TBB_USE_ASSERT==0
-    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL
-#elif TBB_USE_ASSERT==1
-    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL
-#elif TBB_USE_ASSERT==2
-    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL
-#else
-    #error Unexpected value for TBB_USE_ASSERT
-#endif
-
-#define TBB_VERSION_STRINGS_P(N)                \
-    __TBB_ONETBB_SPEC_VERSION(N)                \
-    __TBB_VERSION_NUMBER(N)                     \
-    __TBB_INTERFACE_VERSION_NUMBER(N)           \
-    __TBB_VERSION_USE_DEBUG(N)                  \
-    __TBB_VERSION_USE_ASSERT(N)
-
-#define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB)
-#define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc)
-
-//! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB)
-//! shared library being used.
-/**
- * The returned pointer is an address of a string in the shared library.
- * It can be different than the TBB_VERSION_STRING obtained at compile time.
- */
-extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version();
-
-//! The function returns the interface version of the oneTBB shared library being used.
-/**
- * The returned version is determined at runtime, not at compile/link time.
- * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time.
- */
-extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version();
-
-#endif // __TBB_version_H
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#ifndef __TBB_version_H 
+#define __TBB_version_H 
+ 
+#include "detail/_config.h" 
+#include "detail/_namespace_injection.h" 
+ 
+// Product version 
+#define TBB_VERSION_MAJOR 2021 
+// Update version 
+#define TBB_VERSION_MINOR 2 
+// "Patch" version for custom releases 
+#define TBB_VERSION_PATCH 0 
+// Suffix string 
+#define __TBB_VERSION_SUFFIX "" 
+// Full official version string 
+#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX 
+ 
+// OneAPI oneTBB specification version 
+#define ONETBB_SPEC_VERSION "1.0" 
+// Full interface version 
+#define TBB_INTERFACE_VERSION 12020 
+// Major interface version 
+#define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) 
+// Minor interface version 
+#define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10) 
+ 
+// The binary compatibility version 
+// To be used in SONAME, manifests, etc. 
+#define __TBB_BINARY_VERSION 12 
+ 
+//! TBB_VERSION support 
+#ifndef ENDL 
+#define ENDL "\n" 
+#endif 
+ 
+//TBB_REVAMP_TODO: consider enabling version_string.ver generation 
+//TBB_REVAMP_TODO: #include "version_string.ver" 
+ 
+#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL 
+#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL 
+#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL 
+ 
+#ifndef TBB_USE_DEBUG 
+    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL 
+#elif TBB_USE_DEBUG==0 
+    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL 
+#elif TBB_USE_DEBUG==1 
+    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL 
+#elif TBB_USE_DEBUG==2 
+    #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL 
+#else 
+    #error Unexpected value for TBB_USE_DEBUG 
+#endif 
+ 
+#ifndef TBB_USE_ASSERT 
+    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL 
+#elif TBB_USE_ASSERT==0 
+    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL 
+#elif TBB_USE_ASSERT==1 
+    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL 
+#elif TBB_USE_ASSERT==2 
+    #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL 
+#else 
+    #error Unexpected value for TBB_USE_ASSERT 
+#endif 
+ 
+#define TBB_VERSION_STRINGS_P(N)                \ 
+    __TBB_ONETBB_SPEC_VERSION(N)                \ 
+    __TBB_VERSION_NUMBER(N)                     \ 
+    __TBB_INTERFACE_VERSION_NUMBER(N)           \ 
+    __TBB_VERSION_USE_DEBUG(N)                  \ 
+    __TBB_VERSION_USE_ASSERT(N) 
+ 
+#define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB) 
+#define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc) 
+ 
+//! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB) 
+//! shared library being used. 
+/** 
+ * The returned pointer is an address of a string in the shared library. 
+ * It can be different than the TBB_VERSION_STRING obtained at compile time. 
+ */ 
+extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); 
+ 
+//! The function returns the interface version of the oneTBB shared library being used. 
+/** 
+ * The returned version is determined at runtime, not at compile/link time. 
+ * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. 
+ */ 
+extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); 
+ 
+#endif // __TBB_version_H 
diff --git a/contrib/libs/tbb/include/tbb/blocked_range.h b/contrib/libs/tbb/include/tbb/blocked_range.h
index 316ec01ba9..40b0d76261 100644
--- a/contrib/libs/tbb/include/tbb/blocked_range.h
+++ b/contrib/libs/tbb/include/tbb/blocked_range.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/blocked_range.h"
+#include "../oneapi/tbb/blocked_range.h" 
diff --git a/contrib/libs/tbb/include/tbb/blocked_range2d.h b/contrib/libs/tbb/include/tbb/blocked_range2d.h
index 1e13240787..62b0de3886 100644
--- a/contrib/libs/tbb/include/tbb/blocked_range2d.h
+++ b/contrib/libs/tbb/include/tbb/blocked_range2d.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/blocked_range2d.h"
+#include "../oneapi/tbb/blocked_range2d.h" 
diff --git a/contrib/libs/tbb/include/tbb/blocked_range3d.h b/contrib/libs/tbb/include/tbb/blocked_range3d.h
index 3321979660..c1d949ed87 100644
--- a/contrib/libs/tbb/include/tbb/blocked_range3d.h
+++ b/contrib/libs/tbb/include/tbb/blocked_range3d.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/blocked_range3d.h"
+#include "../oneapi/tbb/blocked_range3d.h" 
diff --git a/contrib/libs/tbb/include/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h
index 0c0fb7303a..654f617ff6 100644
--- a/contrib/libs/tbb/include/tbb/blocked_rangeNd.h
+++ b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2017-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/blocked_rangeNd.h"
+/* 
+    Copyright (c) 2017-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/blocked_rangeNd.h" 
diff --git a/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h
index 2d3c66a74a..c257a5a1da 100644
--- a/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h
+++ b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/cache_aligned_allocator.h"
+#include "../oneapi/tbb/cache_aligned_allocator.h" 
diff --git a/contrib/libs/tbb/include/tbb/combinable.h b/contrib/libs/tbb/include/tbb/combinable.h
index 50295ec72a..71b60e8c01 100644
--- a/contrib/libs/tbb/include/tbb/combinable.h
+++ b/contrib/libs/tbb/include/tbb/combinable.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/combinable.h"
+#include "../oneapi/tbb/combinable.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h
index 68652c5961..4099be202b 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_hash_map.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_hash_map.h"
+#include "../oneapi/tbb/concurrent_hash_map.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h
index 2757a234be..4c8cc6d579 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_lru_cache.h"
+#include "../oneapi/tbb/concurrent_lru_cache.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_map.h b/contrib/libs/tbb/include/tbb/concurrent_map.h
index 84f59d7e66..3d5c3e80a5 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_map.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_map.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/concurrent_map.h"
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/concurrent_map.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h
index 3b27130b1e..c2db1cac41 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_priority_queue.h"
+#include "../oneapi/tbb/concurrent_priority_queue.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_queue.h b/contrib/libs/tbb/include/tbb/concurrent_queue.h
index d81a58b887..68580e5c7c 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_queue.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_queue.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_queue.h"
+#include "../oneapi/tbb/concurrent_queue.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_set.h b/contrib/libs/tbb/include/tbb/concurrent_set.h
index cf4652f597..f822ada28f 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_set.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_set.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/concurrent_set.h"
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/concurrent_set.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h
index 9475c06cf3..8bd7b8d954 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_unordered_map.h"
+#include "../oneapi/tbb/concurrent_unordered_map.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h
index 81a8f9c37d..d8527b4fa5 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_unordered_set.h"
+#include "../oneapi/tbb/concurrent_unordered_set.h" 
diff --git a/contrib/libs/tbb/include/tbb/concurrent_vector.h b/contrib/libs/tbb/include/tbb/concurrent_vector.h
index c1fc97c623..9e72476cc5 100644
--- a/contrib/libs/tbb/include/tbb/concurrent_vector.h
+++ b/contrib/libs/tbb/include/tbb/concurrent_vector.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/concurrent_vector.h"
+#include "../oneapi/tbb/concurrent_vector.h" 
diff --git a/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h
index 9d6050d64f..d36e50038d 100644
--- a/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h
+++ b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/enumerable_thread_specific.h"
+#include "../oneapi/tbb/enumerable_thread_specific.h" 
diff --git a/contrib/libs/tbb/include/tbb/flow_graph.h b/contrib/libs/tbb/include/tbb/flow_graph.h
index 40da468fe0..b337ae80a3 100644
--- a/contrib/libs/tbb/include/tbb/flow_graph.h
+++ b/contrib/libs/tbb/include/tbb/flow_graph.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/flow_graph.h"
+#include "../oneapi/tbb/flow_graph.h" 
diff --git a/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h
index cd9dc2967e..a24a4ea744 100644
--- a/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h
+++ b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/flow_graph_abstractions.h"
+#include "../oneapi/tbb/flow_graph_abstractions.h" 
diff --git a/contrib/libs/tbb/include/tbb/global_control.h b/contrib/libs/tbb/include/tbb/global_control.h
index 2688996ecb..0c87e9a051 100644
--- a/contrib/libs/tbb/include/tbb/global_control.h
+++ b/contrib/libs/tbb/include/tbb/global_control.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/global_control.h"
+#include "../oneapi/tbb/global_control.h" 
diff --git a/contrib/libs/tbb/include/tbb/info.h b/contrib/libs/tbb/include/tbb/info.h
index 02d331650e..7a42b6ce6b 100644
--- a/contrib/libs/tbb/include/tbb/info.h
+++ b/contrib/libs/tbb/include/tbb/info.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2019-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/info.h"
+/* 
+    Copyright (c) 2019-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/info.h" 
diff --git a/contrib/libs/tbb/include/tbb/memory_pool.h b/contrib/libs/tbb/include/tbb/memory_pool.h
index cefe96e36d..3e971d4e0f 100644
--- a/contrib/libs/tbb/include/tbb/memory_pool.h
+++ b/contrib/libs/tbb/include/tbb/memory_pool.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/memory_pool.h"
+#include "../oneapi/tbb/memory_pool.h" 
diff --git a/contrib/libs/tbb/include/tbb/null_mutex.h b/contrib/libs/tbb/include/tbb/null_mutex.h
index 63218bf061..8d1bb5263e 100644
--- a/contrib/libs/tbb/include/tbb/null_mutex.h
+++ b/contrib/libs/tbb/include/tbb/null_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/null_mutex.h"
+#include "../oneapi/tbb/null_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/tbb/null_rw_mutex.h
index 71c42fe26a..44c8f55983 100644
--- a/contrib/libs/tbb/include/tbb/null_rw_mutex.h
+++ b/contrib/libs/tbb/include/tbb/null_rw_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/null_rw_mutex.h"
+#include "../oneapi/tbb/null_rw_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_for.h b/contrib/libs/tbb/include/tbb/parallel_for.h
index fea1d1b9f5..3403240263 100644
--- a/contrib/libs/tbb/include/tbb/parallel_for.h
+++ b/contrib/libs/tbb/include/tbb/parallel_for.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_for.h"
+#include "../oneapi/tbb/parallel_for.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_for_each.h b/contrib/libs/tbb/include/tbb/parallel_for_each.h
index 27c2ab1727..d5e0c2d08a 100644
--- a/contrib/libs/tbb/include/tbb/parallel_for_each.h
+++ b/contrib/libs/tbb/include/tbb/parallel_for_each.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_for_each.h"
+#include "../oneapi/tbb/parallel_for_each.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_invoke.h b/contrib/libs/tbb/include/tbb/parallel_invoke.h
index 6c21100e70..01f15cfe84 100644
--- a/contrib/libs/tbb/include/tbb/parallel_invoke.h
+++ b/contrib/libs/tbb/include/tbb/parallel_invoke.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_invoke.h"
+#include "../oneapi/tbb/parallel_invoke.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/tbb/parallel_pipeline.h
index aceee49f8a..8f8c821946 100644
--- a/contrib/libs/tbb/include/tbb/parallel_pipeline.h
+++ b/contrib/libs/tbb/include/tbb/parallel_pipeline.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/parallel_pipeline.h"
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/parallel_pipeline.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_reduce.h b/contrib/libs/tbb/include/tbb/parallel_reduce.h
index 83658755a8..c0161c8023 100644
--- a/contrib/libs/tbb/include/tbb/parallel_reduce.h
+++ b/contrib/libs/tbb/include/tbb/parallel_reduce.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_reduce.h"
+#include "../oneapi/tbb/parallel_reduce.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_scan.h b/contrib/libs/tbb/include/tbb/parallel_scan.h
index 682032a561..f8dc1e5111 100644
--- a/contrib/libs/tbb/include/tbb/parallel_scan.h
+++ b/contrib/libs/tbb/include/tbb/parallel_scan.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_scan.h"
+#include "../oneapi/tbb/parallel_scan.h" 
diff --git a/contrib/libs/tbb/include/tbb/parallel_sort.h b/contrib/libs/tbb/include/tbb/parallel_sort.h
index b238e6caa4..2eec4913d4 100644
--- a/contrib/libs/tbb/include/tbb/parallel_sort.h
+++ b/contrib/libs/tbb/include/tbb/parallel_sort.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/parallel_sort.h"
+#include "../oneapi/tbb/parallel_sort.h" 
diff --git a/contrib/libs/tbb/include/tbb/partitioner.h b/contrib/libs/tbb/include/tbb/partitioner.h
index b959e35a2f..3cd0e32fc3 100644
--- a/contrib/libs/tbb/include/tbb/partitioner.h
+++ b/contrib/libs/tbb/include/tbb/partitioner.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/partitioner.h"
+#include "../oneapi/tbb/partitioner.h" 
diff --git a/contrib/libs/tbb/include/tbb/profiling.h b/contrib/libs/tbb/include/tbb/profiling.h
index c7cea9c590..f4ebf88164 100644
--- a/contrib/libs/tbb/include/tbb/profiling.h
+++ b/contrib/libs/tbb/include/tbb/profiling.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/profiling.h"
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/profiling.h" 
diff --git a/contrib/libs/tbb/include/tbb/queuing_mutex.h b/contrib/libs/tbb/include/tbb/queuing_mutex.h
index ad031e4eb7..4766cf92c1 100644
--- a/contrib/libs/tbb/include/tbb/queuing_mutex.h
+++ b/contrib/libs/tbb/include/tbb/queuing_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/queuing_mutex.h"
+#include "../oneapi/tbb/queuing_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h
index 203727ccc5..e4d4dd66f3 100644
--- a/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h
+++ b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/queuing_rw_mutex.h"
+#include "../oneapi/tbb/queuing_rw_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/scalable_allocator.h b/contrib/libs/tbb/include/tbb/scalable_allocator.h
index 5c654ebd68..d3d7b9b9db 100644
--- a/contrib/libs/tbb/include/tbb/scalable_allocator.h
+++ b/contrib/libs/tbb/include/tbb/scalable_allocator.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/scalable_allocator.h"
+#include "../oneapi/tbb/scalable_allocator.h" 
diff --git a/contrib/libs/tbb/include/tbb/spin_mutex.h b/contrib/libs/tbb/include/tbb/spin_mutex.h
index 1a6f7f077f..a092d73edb 100644
--- a/contrib/libs/tbb/include/tbb/spin_mutex.h
+++ b/contrib/libs/tbb/include/tbb/spin_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/spin_mutex.h"
+#include "../oneapi/tbb/spin_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h
index d36282b486..a9a9685c20 100644
--- a/contrib/libs/tbb/include/tbb/spin_rw_mutex.h
+++ b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/spin_rw_mutex.h"
+#include "../oneapi/tbb/spin_rw_mutex.h" 
diff --git a/contrib/libs/tbb/include/tbb/task.h b/contrib/libs/tbb/include/tbb/task.h
index 9be95b0d69..dae096777b 100644
--- a/contrib/libs/tbb/include/tbb/task.h
+++ b/contrib/libs/tbb/include/tbb/task.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/task.h"
+#include "../oneapi/tbb/task.h" 
diff --git a/contrib/libs/tbb/include/tbb/task_arena.h b/contrib/libs/tbb/include/tbb/task_arena.h
index f6e34b3e6d..600e223089 100644
--- a/contrib/libs/tbb/include/tbb/task_arena.h
+++ b/contrib/libs/tbb/include/tbb/task_arena.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/task_arena.h"
+#include "../oneapi/tbb/task_arena.h" 
diff --git a/contrib/libs/tbb/include/tbb/task_group.h b/contrib/libs/tbb/include/tbb/task_group.h
index 2f02503971..76e4ebb0ce 100644
--- a/contrib/libs/tbb/include/tbb/task_group.h
+++ b/contrib/libs/tbb/include/tbb/task_group.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/task_group.h"
+#include "../oneapi/tbb/task_group.h" 
diff --git a/contrib/libs/tbb/include/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h
index 9236f4cdf4..51740119e0 100644
--- a/contrib/libs/tbb/include/tbb/task_scheduler_observer.h
+++ b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/task_scheduler_observer.h"
+#include "../oneapi/tbb/task_scheduler_observer.h" 
diff --git a/contrib/libs/tbb/include/tbb/tbb.h b/contrib/libs/tbb/include/tbb/tbb.h
index e443b8f1ca..a3383ace99 100644
--- a/contrib/libs/tbb/include/tbb/tbb.h
+++ b/contrib/libs/tbb/include/tbb/tbb.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb.h"
+#include "../oneapi/tbb.h" 
diff --git a/contrib/libs/tbb/include/tbb/tbb_allocator.h b/contrib/libs/tbb/include/tbb/tbb_allocator.h
index 81ab9d33b5..16210cb8d3 100644
--- a/contrib/libs/tbb/include/tbb/tbb_allocator.h
+++ b/contrib/libs/tbb/include/tbb/tbb_allocator.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/tbb_allocator.h"
+#include "../oneapi/tbb/tbb_allocator.h" 
diff --git a/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h
index 93eaa18e80..373ba3777a 100644
--- a/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h
+++ b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/tbbmalloc_proxy.h"
+#include "../oneapi/tbb/tbbmalloc_proxy.h" 
diff --git a/contrib/libs/tbb/include/tbb/tick_count.h b/contrib/libs/tbb/include/tbb/tick_count.h
index 170074aebb..f6f45ba52e 100644
--- a/contrib/libs/tbb/include/tbb/tick_count.h
+++ b/contrib/libs/tbb/include/tbb/tick_count.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (c) 2005-2021 Intel Corporation
+    Copyright (c) 2005-2021 Intel Corporation 
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.
@@ -14,4 +14,4 @@
     limitations under the License.
 */
 
-#include "../oneapi/tbb/tick_count.h"
+#include "../oneapi/tbb/tick_count.h" 
diff --git a/contrib/libs/tbb/include/tbb/version.h b/contrib/libs/tbb/include/tbb/version.h
index cd13a83a15..6d4d78ff46 100644
--- a/contrib/libs/tbb/include/tbb/version.h
+++ b/contrib/libs/tbb/include/tbb/version.h
@@ -1,17 +1,17 @@
-/*
-    Copyright (c) 2005-2021 Intel Corporation
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-*/
-
-#include "../oneapi/tbb/version.h"
+/* 
+    Copyright (c) 2005-2021 Intel Corporation 
+ 
+    Licensed under the Apache License, Version 2.0 (the "License"); 
+    you may not use this file except in compliance with the License. 
+    You may obtain a copy of the License at 
+ 
+        http://www.apache.org/licenses/LICENSE-2.0 
+ 
+    Unless required by applicable law or agreed to in writing, software 
+    distributed under the License is distributed on an "AS IS" BASIS, 
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+    See the License for the specific language governing permissions and 
+    limitations under the License. 
+*/ 
+ 
+#include "../oneapi/tbb/version.h"
author	Stanislav Kirillov <staskirillov@gmail.com>	2022-02-10 16:46:07 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:46:07 +0300
commit	92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c (patch)
tree	817034f4ca57c9f841bb047ec94630c2e78a2b1d /contrib/libs/tbb/include
parent	53c76da6d9f6cc5a17f6029df396f0e3bc1ff47d (diff)
download	ydb-92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c.tar.gz