diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2022-09-17 22:03:44 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2022-09-17 22:03:44 +0300 |
commit | 49726f2627f20969d8e6358fc107bdf139c87f99 (patch) | |
tree | f94d610e293cb577fbf973b1d0ac07c005c948d0 /contrib/restricted/boost/move | |
parent | e4065899cddff8a7bde3bc774f33e9b6bebd961c (diff) | |
download | ydb-49726f2627f20969d8e6358fc107bdf139c87f99.tar.gz |
Update contrib/restricted/boost/interprocess to 1.80.0
Diffstat (limited to 'contrib/restricted/boost/move')
-rw-r--r-- | contrib/restricted/boost/move/include/boost/move/algo/adaptive_sort.hpp | 654 | ||||
-rw-r--r-- | contrib/restricted/boost/move/include/boost/move/algo/detail/pdqsort.hpp | 344 |
2 files changed, 998 insertions, 0 deletions
diff --git a/contrib/restricted/boost/move/include/boost/move/algo/adaptive_sort.hpp b/contrib/restricted/boost/move/include/boost/move/algo/adaptive_sort.hpp new file mode 100644 index 0000000000..d1aa883cb1 --- /dev/null +++ b/contrib/restricted/boost/move/include/boost/move/algo/adaptive_sort.hpp @@ -0,0 +1,654 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2015-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP +#define BOOST_MOVE_ADAPTIVE_SORT_HPP + +#include <boost/move/detail/config_begin.hpp> + +#include <boost/move/algo/detail/adaptive_sort_merge.hpp> +#include <boost/core/ignore_unused.hpp> + +#if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600)) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +namespace boost { +namespace movelib { + +///@cond +namespace detail_adaptive { + +template<class RandIt> +void move_data_backward( RandIt cur_pos + , typename iter_size<RandIt>::type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + } + else{ + boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +template<class RandIt> +void move_data_forward( RandIt cur_pos + , typename iter_size<RandIt>::type const l_data + , RandIt new_pos + , bool const xbuf_used) +{ + //Move buffer to the total combination right + if(xbuf_used){ + boost::move(cur_pos, cur_pos+l_data, new_pos); + } + else{ + boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); + //Rotate does less moves but it seems slower due to cache issues + //rotate_gcd(first-l_block, first+len-l_block, first+len); + } +} + +// build blocks of length 2*l_build_buf. l_build_buf is power of two +// input: [0, l_build_buf) elements are buffer, rest unsorted elements +// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted +// +// First elements are merged from right to left until elements start +// at first. All old elements [first, first + l_build_buf) are placed at the end +// [first+len-l_build_buf, first+len). To achieve this: +// - If we have external memory to merge, we save elements from the buffer +// so that a non-swapping merge is used. Buffer elements are restored +// at the end of the buffer from the external memory. +// +// - When the external memory is not available or it is insufficient +// for a merge operation, left swap merging is used. +// +// Once elements are merged left to right in blocks of l_build_buf, then a single left +// to right merge step is performed to achieve merged blocks of size 2K. +// If external memory is available, usual merge is used, swap merging otherwise. +// +// As a last step, if auxiliary memory is available in-place merge is performed. +// until all is merged or auxiliary memory is not large enough. +template<class RandIt, class Compare, class XBuf> +typename iter_size<RandIt>::type + adaptive_sort_build_blocks + ( RandIt const first + , typename iter_size<RandIt>::type const len + , typename iter_size<RandIt>::type const l_base + , typename iter_size<RandIt>::type const l_build_buf + , XBuf & xbuf + , Compare comp) +{ + typedef typename iter_size<RandIt>::type size_type; + BOOST_ASSERT(l_build_buf <= len); + BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); + + //Place the start pointer after the buffer + RandIt first_block = first + l_build_buf; + size_type const elements_in_blocks = size_type(len - l_build_buf); + + ////////////////////////////////// + // Start of merge to left step + ////////////////////////////////// + size_type l_merged = 0u; + + BOOST_ASSERT(l_build_buf); + //If there is no enough buffer for the insertion sort step, just avoid the external buffer + size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); + kbuf = kbuf < l_base ? 0 : kbuf; + + if(kbuf){ + //Backup internal buffer values in external buffer so they can be overwritten + xbuf.move_assign(first+l_build_buf-kbuf, kbuf); + l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); + + //Now combine them using the buffer. Elements from buffer can be + //overwritten since they've been saved to xbuf + l_merged = op_merge_left_step_multiple + ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(kbuf - l_merged), comp, move_op()); + + //Restore internal buffer from external buffer unless kbuf was l_build_buf, + //in that case restoration will happen later + if(kbuf != l_build_buf){ + boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); + } + } + else{ + l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); + rotate_gcd(first_block-l_merged, first_block, first_block+elements_in_blocks); + } + + //Now combine elements using the buffer. Elements from buffer can't be + //overwritten since xbuf was not big enough, so merge swapping elements. + l_merged = op_merge_left_step_multiple + (first_block-l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(l_build_buf - l_merged), comp, swap_op()); + + BOOST_ASSERT(l_merged == l_build_buf); + + ////////////////////////////////// + // Start of merge to right step + ////////////////////////////////// + + //If kbuf is l_build_buf then we can merge right without swapping + //Saved data is still in xbuf + if(kbuf && kbuf == l_build_buf){ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); + //Restore internal buffer from external buffer if kbuf was l_build_buf. + //as this operation was previously delayed. + boost::move(xbuf.data(), xbuf.data() + kbuf, first); + } + else{ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); + } + xbuf.clear(); + //2*l_build_buf or total already merged + return min_value<size_type>(elements_in_blocks, size_type(2u*l_build_buf)); +} + +template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> +void adaptive_sort_combine_blocks + ( RandItKeys const keys + , KeyCompare key_comp + , RandIt const first + , typename iter_size<RandIt>::type const len + , typename iter_size<RandIt>::type const l_prev_merged + , typename iter_size<RandIt>::type const l_block + , bool const use_buf + , bool const xbuf_used + , XBuf & xbuf + , Compare comp + , bool merge_left) +{ + boost::ignore_unused(xbuf); + typedef typename iter_size<RandIt>::type size_type; + + size_type const l_reg_combined = size_type(2u*l_prev_merged); + size_type l_irreg_combined = 0; + size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); + size_type const n_reg_combined = len/l_reg_combined; + RandIt combined_first = first; + + boost::ignore_unused(l_total_combined); + BOOST_ASSERT(l_total_combined <= len); + + size_type const max_i = size_type(n_reg_combined + (l_irreg_combined != 0)); + + if(merge_left || !use_buf) { + for( size_type combined_i = 0; combined_i != max_i; ) { + //Now merge blocks + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + if(!use_buf){ + merge_blocks_bufferless + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); + } + else{ + merge_blocks_left + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block); + ++combined_i; + if(combined_i != max_i) + combined_first += l_reg_combined; + } + } + else{ + combined_first += size_type(l_reg_combined*(max_i-1u)); + for( size_type combined_i = max_i; combined_i; ) { + --combined_i; + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + RandIt const combined_last(combined_first+l_cur_combined); + range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + merge_blocks_right + (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block); + if(combined_i) + combined_first -= l_reg_combined; + } + } +} + +//Returns true if buffer is placed in +//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is +//[buffer,buffer+l_intbuf) +template<class RandIt, class Compare, class XBuf> +bool adaptive_sort_combine_all_blocks + ( RandIt keys + , typename iter_size<RandIt>::type &n_keys + , RandIt const buffer + , typename iter_size<RandIt>::type const l_buf_plus_data + , typename iter_size<RandIt>::type l_merged + , typename iter_size<RandIt>::type &l_intbuf + , XBuf & xbuf + , Compare comp) +{ + typedef typename iter_size<RandIt>::type size_type; + + RandIt const first = buffer + l_intbuf; + size_type const l_data = size_type(l_buf_plus_data - l_intbuf); + size_type const l_unique = size_type(l_intbuf + n_keys); + //Backup data to external buffer once if possible + bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); + if(common_xbuf){ + xbuf.move_assign(buffer, l_intbuf); + } + + bool prev_merge_left = true; + size_type l_prev_total_combined = l_merged, l_prev_block = 0; + bool prev_use_internal_buf = true; + + for( size_type n = 0; l_data > l_merged + ; l_merged = size_type(2u*l_merged) + , ++n){ + //If l_intbuf is non-zero, use that internal buffer. + // Implies l_block == l_intbuf && use_internal_buf == true + //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, + // Implies l_block == n_keys/2 && use_internal_buf == true + //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) + bool use_internal_buf = false; + size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf); + BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); + BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); + BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); + + bool const is_merge_left = (n&1) == 0; + size_type const l_total_combined = calculate_total_combined(l_data, l_merged); + if(n && prev_use_internal_buf && prev_merge_left){ + if(is_merge_left || !use_internal_buf){ + move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); + } + else{ + //Put the buffer just after l_total_combined + RandIt const buf_end = first+l_prev_total_combined; + RandIt const buf_beg = buf_end-l_block; + if(l_prev_total_combined > l_total_combined){ + size_type const l_diff = size_type(l_prev_total_combined - l_total_combined); + move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); + } + else if(l_prev_total_combined < l_total_combined){ + size_type const l_diff = size_type(l_total_combined - l_prev_total_combined); + move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf); + } + + //Combine to form l_merged*2 segments + if(n_keys){ + size_type upper_n_keys_this_iter = size_type(2u*l_merged/l_block); + if(upper_n_keys_this_iter > 256){ + adaptive_sort_combine_blocks + ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + else{ + unsigned char uint_keys[256]; + adaptive_sort_combine_blocks + ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + } + else{ + size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); + adaptive_sort_combine_blocks + ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block + , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); + } + + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf); + prev_merge_left = is_merge_left; + l_prev_total_combined = l_total_combined; + l_prev_block = l_block; + prev_use_internal_buf = use_internal_buf; + } + BOOST_ASSERT(l_prev_total_combined == l_data); + bool const buffer_right = prev_use_internal_buf && prev_merge_left; + + l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; + n_keys = size_type(l_unique - l_intbuf); + //Restore data from to external common buffer if used + if(common_xbuf){ + if(buffer_right){ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); + } + else{ + boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); + } + } + return buffer_right; +} + + +template<class RandIt, class Compare, class XBuf> +void adaptive_sort_final_merge( bool buffer_right + , RandIt const first + , typename iter_size<RandIt>::type const l_intbuf + , typename iter_size<RandIt>::type const n_keys + , typename iter_size<RandIt>::type const len + , XBuf & xbuf + , Compare comp) +{ + //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); + xbuf.clear(); + + typedef typename iter_size<RandIt>::type size_type; + + size_type const n_key_plus_buf = size_type(l_intbuf+n_keys); + if(buffer_right){ + //Use stable sort as some buffer elements might not be unique (see non_unique_buf) + stable_sort(first+len-l_intbuf, first+len, comp, xbuf); + stable_merge( first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); + unstable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + //Use stable sort as some buffer elements might not be unique (see non_unique_buf) + stable_sort(first, first+n_key_plus_buf, comp, xbuf); + if(xbuf.capacity() >= n_key_plus_buf){ + buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); + } + else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ + stable_merge( first+n_keys, first+n_key_plus_buf + , first+len, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + else{ + stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len); +} + +template<class RandIt, class Compare, class Unsigned, class XBuf> +bool adaptive_sort_build_params + (RandIt first, Unsigned const len, Compare comp + , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf + , XBuf & xbuf + ) +{ + typedef typename iter_size<RandIt>::type size_type; + + //Calculate ideal parameters and try to collect needed unique keys + l_base = 0u; + + //Try to find a value near sqrt(len) that is 2^N*l_base where + //l_base <= AdaptiveSortInsertionSortThreshold. This property is important + //as build_blocks merges to the left iteratively duplicating the + //merged size and all the buffer must be used just before the final + //merge to right step. This guarantees "build_blocks" produces + //segments of size l_build_buf*2, maximizing the classic merge phase. + l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); + + //The internal buffer can be expanded if there is enough external memory + while(xbuf.capacity() >= l_intbuf*2){ + l_intbuf = size_type(2u*l_intbuf); + } + + //This is the minimum number of keys to implement the ideal algorithm + // + //l_intbuf is used as buffer plus the key count + size_type n_min_ideal_keys = size_type(l_intbuf-1u); + while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ + --n_min_ideal_keys; + } + ++n_min_ideal_keys; + BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); + + if(xbuf.template supports_aligned_trailing<size_type> + (l_intbuf, size_type((size_type(len-l_intbuf)-1u)/l_intbuf+1u))){ + n_keys = 0u; + l_build_buf = l_intbuf; + } + else{ + //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that + //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half + //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. + // + //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, + //(to be used for keys in combine_all_blocks) as the whole l_build_buf + //will be backuped in the buffer during build_blocks. + bool const non_unique_buf = xbuf.capacity() >= l_intbuf; + size_type const to_collect = non_unique_buf ? n_min_ideal_keys : size_type(l_intbuf*2u); + size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); + + //If available memory is 2*sqrt(l), then for "build_params" + //the situation is the same as if 2*l_intbuf were collected. + if(non_unique_buf && collected == n_min_ideal_keys){ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } + else if(collected == 2*l_intbuf){ + //l_intbuf*2 elements found. Use all of them in the build phase + l_build_buf = size_type(l_intbuf*2); + n_keys = l_intbuf; + } + else if(collected >= (n_min_ideal_keys+l_intbuf)){ + l_build_buf = l_intbuf; + n_keys = size_type(collected - l_intbuf); + } + //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix + //is possible (due to very low unique keys), then go to a slow sort based on rotations. + else{ + BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); + if(collected < 4){ //No combination possible with less that 4 keys + return false; + } + n_keys = l_intbuf; + while(n_keys & (n_keys-1u)){ + n_keys &= size_type(n_keys-1u); // make it power or 2 + } + while(n_keys > collected){ + n_keys/=2; + } + //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two + l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); + l_intbuf = 0; + l_build_buf = n_keys; + } + BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); + } + + return true; +} + +// Main explanation of the sort algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, 2*csqrtlen unique elements elements are extracted from elements to be +// sorted and placed in the beginning of the range. +// +// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements +// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are +// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step +// 2*csqrtlen unique elements are again the leading elements of the whole range. +// +// * Step "combine_blocks": pairs of previously formed blocks are merged with a different +// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the +// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen +// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. +// +// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to +// know if elements belong to the first or second block to be merged and another +// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: +// +// Iteratively until all trailing (len-2*csqrtlen) elements are merged: +// Iteratively for each pair of previously merged block: +// * Blocks are divided groups of csqrtlen elements and +// 2*merged_block/csqrtlen keys are sorted to be used as markers +// * Groups are selection-sorted by first or last element (depending whether they are going +// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair are merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with +// rotations with the rest of sorted elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no 2*csqrtlen elements can be extracted: +// +// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used +// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This +// means that an additional "combine_blocks" step will be needed to merge all elements. +// +// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, +// then reduces the number of elements used as buffer and keys in the "build_blocks" +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. +// +// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. +// +// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks +// using classic merge and "combine_blocks" will use bigger blocks when merging. +template<class RandIt, class Compare, class XBuf> +void adaptive_sort_impl + ( RandIt first + , typename iter_size<RandIt>::type const len + , Compare comp + , XBuf & xbuf + ) +{ + typedef typename iter_size<RandIt>::type size_type; + + //Small sorts go directly to insertion sort + if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ + insertion_sort(first, first + len, comp); + } + else if((len-len/2) <= xbuf.capacity()){ + merge_sort(first, first+len, comp, xbuf.data()); + } + else{ + //Make sure it is at least four + BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); + + size_type l_base = 0; + size_type l_intbuf = 0; + size_type n_keys = 0; + size_type l_build_buf = 0; + + //Calculate and extract needed unique elements. If a minimum is not achieved + //fallback to a slow stable sort + if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ + stable_sort(first, first+len, comp, xbuf); + } + else{ + BOOST_ASSERT(l_build_buf); + //Otherwise, continue the adaptive_sort + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len); + size_type const n_key_plus_buf = size_type(l_intbuf+n_keys); + //l_build_buf is always power of two if l_intbuf is zero + BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); + + //Classic merge sort until internal buffer and xbuf are exhausted + size_type const l_merged = adaptive_sort_build_blocks + ( first + n_key_plus_buf-l_build_buf + , size_type(len-n_key_plus_buf+l_build_buf) + , l_base, l_build_buf, xbuf, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len); + + //Non-trivial merge + bool const buffer_right = adaptive_sort_combine_all_blocks + (first, n_keys, first+n_keys, size_type(len-n_keys), l_merged, l_intbuf, xbuf, comp); + + //Sort keys and buffer and merge the whole sequence + adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); + } + } +} + +} //namespace detail_adaptive { + +///@endcond + +//! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according +//! to comparison functor "comp". The sort is stable (order of equal elements +//! is guaranteed to be preserved). Performance is improved if additional raw storage is +//! provided. +//! +//! <b>Requires</b>: +//! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator. +//! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible. +//! +//! <b>Parameters</b>: +//! - first, last: the range of elements to sort +//! - comp: comparison function object which returns true if the first argument is is ordered before the second. +//! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len" +//! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len +//! is ceil(std::distance(first, last)/2). +//! +//! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type +//! of dereferenced RandIt throws. +//! +//! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps. +//! Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized +//! when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when +//! ceil(sqrt(std::distance(first, last)))*2. +//! +//! <b>Caution</b>: Experimental implementation, not production-ready. +template<class RandIt, class RandRawIt, class Compare> +void adaptive_sort( RandIt first, RandIt last, Compare comp + , RandRawIt uninitialized + , typename iter_size<RandIt>::type uninitialized_len) +{ + typedef typename iter_size<RandIt>::type size_type; + typedef typename iterator_traits<RandIt>::value_type value_type; + + ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len); + ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf); +} + +template<class RandIt, class Compare> +void adaptive_sort( RandIt first, RandIt last, Compare comp) +{ + typedef typename iterator_traits<RandIt>::value_type value_type; + adaptive_sort(first, last, comp, (value_type*)0, 0u); +} + +} //namespace movelib { +} //namespace boost { + +#include <boost/move/detail/config_end.hpp> + +#if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600)) +#pragma GCC diagnostic pop +#endif + +#endif //#define BOOST_MOVE_ADAPTIVE_SORT_HPP diff --git a/contrib/restricted/boost/move/include/boost/move/algo/detail/pdqsort.hpp b/contrib/restricted/boost/move/include/boost/move/algo/detail/pdqsort.hpp new file mode 100644 index 0000000000..640f8a3d1b --- /dev/null +++ b/contrib/restricted/boost/move/include/boost/move/algo/detail/pdqsort.hpp @@ -0,0 +1,344 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Orson Peters 2017. +// (C) Copyright Ion Gaztanaga 2017-2018. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// +// +// This implementation of Pattern-defeating quicksort (pdqsort) was written +// by Orson Peters, and discussed in the Boost mailing list: +// http://boost.2283326.n4.nabble.com/sort-pdqsort-td4691031.html +// +// This implementation is the adaptation by Ion Gaztanaga of code originally in GitHub +// with permission from the author to relicense it under the Boost Software License +// (see the Boost mailing list for details). +// +// The original copyright statement is pasted here for completeness: +// +// pdqsort.h - Pattern-defeating quicksort. +// Copyright (c) 2015 Orson Peters +// This software is provided 'as-is', without any express or implied warranty. In no event will the +// authors be held liable for any damages arising from the use of this software. +// Permission is granted to anyone to use this software for any purpose, including commercial +// applications, and to alter it and redistribute it freely, subject to the following restrictions: +// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the +// original software. If you use this software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as +// being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_MOVE_ALGO_PDQSORT_HPP +#define BOOST_MOVE_ALGO_PDQSORT_HPP + +#ifndef BOOST_CONFIG_HPP +# include <boost/config.hpp> +#endif +# +#if defined(BOOST_HAS_PRAGMA_ONCE) +# pragma once +#endif + +#include <boost/move/detail/config_begin.hpp> + +#include <boost/move/detail/workaround.hpp> +#include <boost/move/utility_core.hpp> +#include <boost/move/algo/detail/insertion_sort.hpp> +#include <boost/move/algo/detail/heap_sort.hpp> +#include <boost/move/detail/iterator_traits.hpp> + +#include <boost/move/adl_move_swap.hpp> +#include <cstddef> + +#if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600)) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +namespace boost { +namespace movelib { + +namespace pdqsort_detail { + + //A simple pair implementation to avoid including <utility> + template<class T1, class T2> + struct pair + { + pair() + {} + + pair(const T1 &t1, const T2 &t2) + : first(t1), second(t2) + {} + + T1 first; + T2 second; + }; + + enum { + // Partitions below this size are sorted using insertion sort. + insertion_sort_threshold = 24, + + // Partitions above this size use Tukey's ninther to select the pivot. + ninther_threshold = 128, + + // When we detect an already sorted partition, attempt an insertion sort that allows this + // amount of element moves before giving up. + partial_insertion_sort_limit = 8, + + // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char. + block_size = 64, + + // Cacheline size, assumes power of two. + cacheline_size = 64 + + }; + + // Returns floor(log2(n)), assumes n > 0. + template<class Unsigned> + Unsigned log2(Unsigned n) { + Unsigned log = 0; + while (n >>= 1) ++log; + return log; + } + + // Attempts to use insertion sort on [begin, end). Will return false if more than + // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will + // successfully sort and return true. + template<class Iter, class Compare> + inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + typedef typename boost::movelib:: iter_size<Iter>::type size_type; + if (begin == end) return true; + + size_type limit = 0; + for (Iter cur = begin + 1; cur != end; ++cur) { + if (limit > partial_insertion_sort_limit) return false; + + Iter sift = cur; + Iter sift_1 = cur - 1; + + // Compare first so we can avoid 2 moves for an element already positioned correctly. + if (comp(*sift, *sift_1)) { + T tmp = boost::move(*sift); + + do { *sift-- = boost::move(*sift_1); } + while (sift != begin && comp(tmp, *--sift_1)); + + *sift = boost::move(tmp); + limit += size_type(cur - sift); + } + } + + return true; + } + + template<class Iter, class Compare> + inline void sort2(Iter a, Iter b, Compare comp) { + if (comp(*b, *a)) boost::adl_move_iter_swap(a, b); + } + + // Sorts the elements *a, *b and *c using comparison function comp. + template<class Iter, class Compare> + inline void sort3(Iter a, Iter b, Iter c, Compare comp) { + sort2(a, b, comp); + sort2(b, c, comp); + sort2(a, b, comp); + } + + // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal + // to the pivot are put in the right-hand partition. Returns the position of the pivot after + // partitioning and whether the passed sequence already was correctly partitioned. Assumes the + // pivot is a median of at least 3 elements and that [begin, end) is at least + // insertion_sort_threshold long. + template<class Iter, class Compare> + pdqsort_detail::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + + // Move pivot into local for speed. + T pivot(boost::move(*begin)); + + Iter first = begin; + Iter last = end; + + // Find the first element greater than or equal than the pivot (the median of 3 guarantees + // this exists). + while (comp(*++first, pivot)); + + // Find the first element strictly smaller than the pivot. We have to guard this search if + // there was no element before *first. + if (first - 1 == begin) while (first < last && !comp(*--last, pivot)); + else while ( !comp(*--last, pivot)); + + // If the first pair of elements that should be swapped to partition are the same element, + // the passed in sequence already was correctly partitioned. + bool already_partitioned = first >= last; + + // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously + // swapped pairs guard the searches, which is why the first iteration is special-cased + // above. + while (first < last) { + boost::adl_move_iter_swap(first, last); + while (comp(*++first, pivot)); + while (!comp(*--last, pivot)); + } + + // Put the pivot in the right place. + Iter pivot_pos = first - 1; + *begin = boost::move(*pivot_pos); + *pivot_pos = boost::move(pivot); + + return pdqsort_detail::pair<Iter, bool>(pivot_pos, already_partitioned); + } + + // Similar function to the one above, except elements equal to the pivot are put to the left of + // the pivot and it doesn't check or return if the passed sequence already was partitioned. + // Since this is rarely used (the many equal case), and in that case pdqsort already has O(n) + // performance, no block quicksort is applied here for simplicity. + template<class Iter, class Compare> + inline Iter partition_left(Iter begin, Iter end, Compare comp) { + typedef typename boost::movelib::iterator_traits<Iter>::value_type T; + + T pivot(boost::move(*begin)); + Iter first = begin; + Iter last = end; + + while (comp(pivot, *--last)); + + if (last + 1 == end) while (first < last && !comp(pivot, *++first)); + else while ( !comp(pivot, *++first)); + + while (first < last) { + boost::adl_move_iter_swap(first, last); + while (comp(pivot, *--last)); + while (!comp(pivot, *++first)); + } + + Iter pivot_pos = last; + *begin = boost::move(*pivot_pos); + *pivot_pos = boost::move(pivot); + + return pivot_pos; + } + + + template<class Iter, class Compare> + void pdqsort_loop( Iter begin, Iter end, Compare comp + , typename boost::movelib:: iter_size<Iter>::type bad_allowed + , bool leftmost = true) + { + typedef typename boost::movelib:: iter_size<Iter>::type size_type; + + // Use a while loop for tail recursion elimination. + while (true) { + size_type size = size_type(end - begin); + + // Insertion sort is faster for small arrays. + if (size < insertion_sort_threshold) { + insertion_sort(begin, end, comp); + return; + } + + // Choose pivot as median of 3 or pseudomedian of 9. + size_type s2 = size / 2; + if (size > ninther_threshold) { + sort3(begin, begin + s2, end - 1, comp); + sort3(begin + 1, begin + (s2 - 1), end - 2, comp); + sort3(begin + 2, begin + (s2 + 1), end - 3, comp); + sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); + boost::adl_move_iter_swap(begin, begin + s2); + } else sort3(begin + s2, begin, end - 1, comp); + + // If *(begin - 1) is the end of the right partition of a previous partition operation + // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our + // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in + // the left partition, greater elements in the right partition. We do not have to + // recurse on the left partition, since it's sorted (all equal). + if (!leftmost && !comp(*(begin - 1), *begin)) { + begin = partition_left(begin, end, comp) + 1; + continue; + } + + // Partition and get results. + pdqsort_detail::pair<Iter, bool> part_result = partition_right(begin, end, comp); + Iter pivot_pos = part_result.first; + bool already_partitioned = part_result.second; + + // Check for a highly unbalanced partition. + size_type l_size = size_type(pivot_pos - begin); + size_type r_size = size_type(end - (pivot_pos + 1)); + bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; + + // If we got a highly unbalanced partition we shuffle elements to break many patterns. + if (highly_unbalanced) { + // If we had too many bad partitions, switch to heapsort to guarantee O(n log n). + if (--bad_allowed == 0) { + boost::movelib::heap_sort(begin, end, comp); + return; + } + + if (l_size >= insertion_sort_threshold) { + boost::adl_move_iter_swap(begin, begin + l_size / 4); + boost::adl_move_iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); + + if (l_size > ninther_threshold) { + boost::adl_move_iter_swap(begin + 1, begin + (l_size / 4 + 1)); + boost::adl_move_iter_swap(begin + 2, begin + (l_size / 4 + 2)); + boost::adl_move_iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); + boost::adl_move_iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); + } + } + + if (r_size >= insertion_sort_threshold) { + boost::adl_move_iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); + boost::adl_move_iter_swap(end - 1, end - r_size / 4); + + if (r_size > ninther_threshold) { + boost::adl_move_iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); + boost::adl_move_iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); + boost::adl_move_iter_swap(end - 2, end - (1 + r_size / 4)); + boost::adl_move_iter_swap(end - 3, end - (2 + r_size / 4)); + } + } + } else { + // If we were decently balanced and we tried to sort an already partitioned + // sequence try to use insertion sort. + if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp) + && partial_insertion_sort(pivot_pos + 1, end, comp)) return; + } + + // Sort the left partition first using recursion and do tail recursion elimination for + // the right-hand partition. + pdqsort_loop<Iter, Compare>(begin, pivot_pos, comp, bad_allowed, leftmost); + begin = pivot_pos + 1; + leftmost = false; + } + } +} + + +template<class Iter, class Compare> +void pdqsort(Iter begin, Iter end, Compare comp) +{ + if (begin == end) return; + typedef typename boost::movelib:: iter_size<Iter>::type size_type; + pdqsort_detail::pdqsort_loop<Iter, Compare>(begin, end, comp, pdqsort_detail::log2(size_type(end - begin))); +} + +} //namespace movelib { +} //namespace boost { + +#if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600)) +#pragma GCC diagnostic pop +#endif + +#include <boost/move/detail/config_end.hpp> + +#endif //BOOST_MOVE_ALGO_PDQSORT_HPP |