diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/Support/Parallel.h | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/include/llvm/Support/Parallel.h')
-rw-r--r-- | contrib/libs/llvm12/include/llvm/Support/Parallel.h | 196 |
1 files changed, 98 insertions, 98 deletions
diff --git a/contrib/libs/llvm12/include/llvm/Support/Parallel.h b/contrib/libs/llvm12/include/llvm/Support/Parallel.h index ac053bb4cc..4adcf04c2f 100644 --- a/contrib/libs/llvm12/include/llvm/Support/Parallel.h +++ b/contrib/libs/llvm12/include/llvm/Support/Parallel.h @@ -18,7 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/Error.h" +#include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Threading.h" @@ -128,17 +128,17 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, llvm::Log2_64(std::distance(Start, End)) + 1); } -// TaskGroup has a relatively high overhead, so we want to reduce -// the number of spawn() calls. We'll create up to 1024 tasks here. -// (Note that 1024 is an arbitrary number. This code probably needs -// improving to take the number of available cores into account.) -enum { MaxTasksPerGroup = 1024 }; - +// TaskGroup has a relatively high overhead, so we want to reduce +// the number of spawn() calls. We'll create up to 1024 tasks here. +// (Note that 1024 is an arbitrary number. This code probably needs +// improving to take the number of available cores into account.) +enum { MaxTasksPerGroup = 1024 }; + template <class IterTy, class FuncTy> void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling - // overhead on large inputs. - ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup; + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup; if (TaskSize == 0) TaskSize = 1; @@ -152,9 +152,9 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { template <class IndexTy, class FuncTy> void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling - // overhead on large inputs. - ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup; + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup; if (TaskSize == 0) TaskSize = 1; @@ -170,50 +170,50 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { Fn(J); } -template <class IterTy, class ResultTy, class ReduceFuncTy, - class TransformFuncTy> -ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init, - ReduceFuncTy Reduce, - TransformFuncTy Transform) { - // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling - // overhead on large inputs. - size_t NumInputs = std::distance(Begin, End); - if (NumInputs == 0) - return std::move(Init); - size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs); - std::vector<ResultTy> Results(NumTasks, Init); - { - // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs - // remaining after dividing them equally amongst tasks are distributed as - // one extra input over the first tasks. - TaskGroup TG; - size_t TaskSize = NumInputs / NumTasks; - size_t RemainingInputs = NumInputs % NumTasks; - IterTy TBegin = Begin; - for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) { - IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0); - TG.spawn([=, &Transform, &Reduce, &Results] { - // Reduce the result of transformation eagerly within each task. - ResultTy R = Init; - for (IterTy It = TBegin; It != TEnd; ++It) - R = Reduce(R, Transform(*It)); - Results[TaskId] = R; - }); - TBegin = TEnd; - } - assert(TBegin == End); - } - - // Do a final reduction. There are at most 1024 tasks, so this only adds - // constant single-threaded overhead for large inputs. Hopefully most - // reductions are cheaper than the transformation. - ResultTy FinalResult = std::move(Results.front()); - for (ResultTy &PartialResult : - makeMutableArrayRef(Results.data() + 1, Results.size() - 1)) - FinalResult = Reduce(FinalResult, std::move(PartialResult)); - return std::move(FinalResult); -} - +template <class IterTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + size_t NumInputs = std::distance(Begin, End); + if (NumInputs == 0) + return std::move(Init); + size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs); + std::vector<ResultTy> Results(NumTasks, Init); + { + // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs + // remaining after dividing them equally amongst tasks are distributed as + // one extra input over the first tasks. + TaskGroup TG; + size_t TaskSize = NumInputs / NumTasks; + size_t RemainingInputs = NumInputs % NumTasks; + IterTy TBegin = Begin; + for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) { + IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0); + TG.spawn([=, &Transform, &Reduce, &Results] { + // Reduce the result of transformation eagerly within each task. + ResultTy R = Init; + for (IterTy It = TBegin; It != TEnd; ++It) + R = Reduce(R, Transform(*It)); + Results[TaskId] = R; + }); + TBegin = TEnd; + } + assert(TBegin == End); + } + + // Do a final reduction. There are at most 1024 tasks, so this only adds + // constant single-threaded overhead for large inputs. Hopefully most + // reductions are cheaper than the transformation. + ResultTy FinalResult = std::move(Results.front()); + for (ResultTy &PartialResult : + makeMutableArrayRef(Results.data() + 1, Results.size() - 1)) + FinalResult = Reduce(FinalResult, std::move(PartialResult)); + return std::move(FinalResult); +} + #endif } // namespace detail @@ -256,22 +256,22 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) { Fn(I); } -template <class IterTy, class ResultTy, class ReduceFuncTy, - class TransformFuncTy> -ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init, - ReduceFuncTy Reduce, - TransformFuncTy Transform) { -#if LLVM_ENABLE_THREADS - if (parallel::strategy.ThreadsRequested != 1) { - return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce, - Transform); - } -#endif - for (IterTy I = Begin; I != End; ++I) - Init = Reduce(std::move(Init), Transform(*I)); - return std::move(Init); -} - +template <class IterTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { +#if LLVM_ENABLE_THREADS + if (parallel::strategy.ThreadsRequested != 1) { + return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce, + Transform); + } +#endif + for (IterTy I = Begin; I != End; ++I) + Init = Reduce(std::move(Init), Transform(*I)); + return std::move(Init); +} + // Range wrappers. template <class RangeTy, class Comparator = std::less<decltype(*std::begin(RangeTy()))>> @@ -284,31 +284,31 @@ void parallelForEach(RangeTy &&R, FuncTy Fn) { parallelForEach(std::begin(R), std::end(R), Fn); } -template <class RangeTy, class ResultTy, class ReduceFuncTy, - class TransformFuncTy> -ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init, - ReduceFuncTy Reduce, - TransformFuncTy Transform) { - return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce, - Transform); -} - -// Parallel for-each, but with error handling. -template <class RangeTy, class FuncTy> -Error parallelForEachError(RangeTy &&R, FuncTy Fn) { - // The transform_reduce algorithm requires that the initial value be copyable. - // Error objects are uncopyable. We only need to copy initial success values, - // so work around this mismatch via the C API. The C API represents success - // values with a null pointer. The joinErrors discards null values and joins - // multiple errors into an ErrorList. - return unwrap(parallelTransformReduce( - std::begin(R), std::end(R), wrap(Error::success()), - [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) { - return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs))); - }, - [&Fn](auto &&V) { return wrap(Fn(V)); })); -} - +template <class RangeTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { + return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce, + Transform); +} + +// Parallel for-each, but with error handling. +template <class RangeTy, class FuncTy> +Error parallelForEachError(RangeTy &&R, FuncTy Fn) { + // The transform_reduce algorithm requires that the initial value be copyable. + // Error objects are uncopyable. We only need to copy initial success values, + // so work around this mismatch via the C API. The C API represents success + // values with a null pointer. The joinErrors discards null values and joins + // multiple errors into an ErrorList. + return unwrap(parallelTransformReduce( + std::begin(R), std::end(R), wrap(Error::success()), + [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) { + return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs))); + }, + [&Fn](auto &&V) { return wrap(Fn(V)); })); +} + } // namespace llvm #endif // LLVM_SUPPORT_PARALLEL_H |