aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/include/llvm/Support/Parallel.h
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/Support/Parallel.h
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/include/llvm/Support/Parallel.h')
-rw-r--r--contrib/libs/llvm12/include/llvm/Support/Parallel.h196
1 files changed, 98 insertions, 98 deletions
diff --git a/contrib/libs/llvm12/include/llvm/Support/Parallel.h b/contrib/libs/llvm12/include/llvm/Support/Parallel.h
index ac053bb4cc..4adcf04c2f 100644
--- a/contrib/libs/llvm12/include/llvm/Support/Parallel.h
+++ b/contrib/libs/llvm12/include/llvm/Support/Parallel.h
@@ -18,7 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Threading.h"
@@ -128,17 +128,17 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
llvm::Log2_64(std::distance(Start, End)) + 1);
}
-// TaskGroup has a relatively high overhead, so we want to reduce
-// the number of spawn() calls. We'll create up to 1024 tasks here.
-// (Note that 1024 is an arbitrary number. This code probably needs
-// improving to take the number of available cores into account.)
-enum { MaxTasksPerGroup = 1024 };
-
+// TaskGroup has a relatively high overhead, so we want to reduce
+// the number of spawn() calls. We'll create up to 1024 tasks here.
+// (Note that 1024 is an arbitrary number. This code probably needs
+// improving to take the number of available cores into account.)
+enum { MaxTasksPerGroup = 1024 };
+
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
- // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
- // overhead on large inputs.
- ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup;
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup;
if (TaskSize == 0)
TaskSize = 1;
@@ -152,9 +152,9 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
template <class IndexTy, class FuncTy>
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
- // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
- // overhead on large inputs.
- ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup;
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup;
if (TaskSize == 0)
TaskSize = 1;
@@ -170,50 +170,50 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
Fn(J);
}
-template <class IterTy, class ResultTy, class ReduceFuncTy,
- class TransformFuncTy>
-ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
- ReduceFuncTy Reduce,
- TransformFuncTy Transform) {
- // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
- // overhead on large inputs.
- size_t NumInputs = std::distance(Begin, End);
- if (NumInputs == 0)
- return std::move(Init);
- size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs);
- std::vector<ResultTy> Results(NumTasks, Init);
- {
- // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs
- // remaining after dividing them equally amongst tasks are distributed as
- // one extra input over the first tasks.
- TaskGroup TG;
- size_t TaskSize = NumInputs / NumTasks;
- size_t RemainingInputs = NumInputs % NumTasks;
- IterTy TBegin = Begin;
- for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) {
- IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0);
- TG.spawn([=, &Transform, &Reduce, &Results] {
- // Reduce the result of transformation eagerly within each task.
- ResultTy R = Init;
- for (IterTy It = TBegin; It != TEnd; ++It)
- R = Reduce(R, Transform(*It));
- Results[TaskId] = R;
- });
- TBegin = TEnd;
- }
- assert(TBegin == End);
- }
-
- // Do a final reduction. There are at most 1024 tasks, so this only adds
- // constant single-threaded overhead for large inputs. Hopefully most
- // reductions are cheaper than the transformation.
- ResultTy FinalResult = std::move(Results.front());
- for (ResultTy &PartialResult :
- makeMutableArrayRef(Results.data() + 1, Results.size() - 1))
- FinalResult = Reduce(FinalResult, std::move(PartialResult));
- return std::move(FinalResult);
-}
-
+template <class IterTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ size_t NumInputs = std::distance(Begin, End);
+ if (NumInputs == 0)
+ return std::move(Init);
+ size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs);
+ std::vector<ResultTy> Results(NumTasks, Init);
+ {
+ // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs
+ // remaining after dividing them equally amongst tasks are distributed as
+ // one extra input over the first tasks.
+ TaskGroup TG;
+ size_t TaskSize = NumInputs / NumTasks;
+ size_t RemainingInputs = NumInputs % NumTasks;
+ IterTy TBegin = Begin;
+ for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) {
+ IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0);
+ TG.spawn([=, &Transform, &Reduce, &Results] {
+ // Reduce the result of transformation eagerly within each task.
+ ResultTy R = Init;
+ for (IterTy It = TBegin; It != TEnd; ++It)
+ R = Reduce(R, Transform(*It));
+ Results[TaskId] = R;
+ });
+ TBegin = TEnd;
+ }
+ assert(TBegin == End);
+ }
+
+ // Do a final reduction. There are at most 1024 tasks, so this only adds
+ // constant single-threaded overhead for large inputs. Hopefully most
+ // reductions are cheaper than the transformation.
+ ResultTy FinalResult = std::move(Results.front());
+ for (ResultTy &PartialResult :
+ makeMutableArrayRef(Results.data() + 1, Results.size() - 1))
+ FinalResult = Reduce(FinalResult, std::move(PartialResult));
+ return std::move(FinalResult);
+}
+
#endif
} // namespace detail
@@ -256,22 +256,22 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
Fn(I);
}
-template <class IterTy, class ResultTy, class ReduceFuncTy,
- class TransformFuncTy>
-ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init,
- ReduceFuncTy Reduce,
- TransformFuncTy Transform) {
-#if LLVM_ENABLE_THREADS
- if (parallel::strategy.ThreadsRequested != 1) {
- return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce,
- Transform);
- }
-#endif
- for (IterTy I = Begin; I != End; ++I)
- Init = Reduce(std::move(Init), Transform(*I));
- return std::move(Init);
-}
-
+template <class IterTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+#if LLVM_ENABLE_THREADS
+ if (parallel::strategy.ThreadsRequested != 1) {
+ return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce,
+ Transform);
+ }
+#endif
+ for (IterTy I = Begin; I != End; ++I)
+ Init = Reduce(std::move(Init), Transform(*I));
+ return std::move(Init);
+}
+
// Range wrappers.
template <class RangeTy,
class Comparator = std::less<decltype(*std::begin(RangeTy()))>>
@@ -284,31 +284,31 @@ void parallelForEach(RangeTy &&R, FuncTy Fn) {
parallelForEach(std::begin(R), std::end(R), Fn);
}
-template <class RangeTy, class ResultTy, class ReduceFuncTy,
- class TransformFuncTy>
-ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init,
- ReduceFuncTy Reduce,
- TransformFuncTy Transform) {
- return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce,
- Transform);
-}
-
-// Parallel for-each, but with error handling.
-template <class RangeTy, class FuncTy>
-Error parallelForEachError(RangeTy &&R, FuncTy Fn) {
- // The transform_reduce algorithm requires that the initial value be copyable.
- // Error objects are uncopyable. We only need to copy initial success values,
- // so work around this mismatch via the C API. The C API represents success
- // values with a null pointer. The joinErrors discards null values and joins
- // multiple errors into an ErrorList.
- return unwrap(parallelTransformReduce(
- std::begin(R), std::end(R), wrap(Error::success()),
- [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) {
- return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs)));
- },
- [&Fn](auto &&V) { return wrap(Fn(V)); }));
-}
-
+template <class RangeTy, class ResultTy, class ReduceFuncTy,
+ class TransformFuncTy>
+ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init,
+ ReduceFuncTy Reduce,
+ TransformFuncTy Transform) {
+ return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce,
+ Transform);
+}
+
+// Parallel for-each, but with error handling.
+template <class RangeTy, class FuncTy>
+Error parallelForEachError(RangeTy &&R, FuncTy Fn) {
+ // The transform_reduce algorithm requires that the initial value be copyable.
+ // Error objects are uncopyable. We only need to copy initial success values,
+ // so work around this mismatch via the C API. The C API represents success
+ // values with a null pointer. The joinErrors discards null values and joins
+ // multiple errors into an ErrorList.
+ return unwrap(parallelTransformReduce(
+ std::begin(R), std::end(R), wrap(Error::success()),
+ [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) {
+ return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs)));
+ },
+ [&Fn](auto &&V) { return wrap(Fn(V)); }));
+}
+
} // namespace llvm
#endif // LLVM_SUPPORT_PARALLEL_H