Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:30 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:30 +0300
commit: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree: 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/Support/Parallel.h
parent: 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download: ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
1 files changed, 98 insertions, 98 deletions
diff --git a/contrib/libs/llvm12/include/llvm/Support/Parallel.h b/contrib/libs/llvm12/include/llvm/Support/Parallel.h
index ac053bb4cc..4adcf04c2f 100644
--- a/contrib/libs/llvm12/include/llvm/Support/Parallel.h
+++ b/contrib/libs/llvm12/include/llvm/Support/Parallel.h
@@ -18,7 +18,7 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/Error.h" 
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Threading.h"
 
@@ -128,17 +128,17 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
                       llvm::Log2_64(std::distance(Start, End)) + 1);
 }
 
-// TaskGroup has a relatively high overhead, so we want to reduce
-// the number of spawn() calls. We'll create up to 1024 tasks here.
-// (Note that 1024 is an arbitrary number. This code probably needs
-// improving to take the number of available cores into account.)
-enum { MaxTasksPerGroup = 1024 };
-
+// TaskGroup has a relatively high overhead, so we want to reduce 
+// the number of spawn() calls. We'll create up to 1024 tasks here. 
+// (Note that 1024 is an arbitrary number. This code probably needs 
+// improving to take the number of available cores into account.) 
+enum { MaxTasksPerGroup = 1024 }; 
+ 
 template <class IterTy, class FuncTy>
 void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
-  // overhead on large inputs.
-  ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup;
+  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling 
+  // overhead on large inputs. 
+  ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup; 
   if (TaskSize == 0)
     TaskSize = 1;
 
@@ -152,9 +152,9 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
 
 template <class IndexTy, class FuncTy>
 void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
-  // overhead on large inputs.
-  ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup;
+  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling 
+  // overhead on large inputs. 
+  ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup; 
   if (TaskSize == 0)
     TaskSize = 1;
 
@@ -170,50 +170,50 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
     Fn(J);
 }
 
-template <class IterTy, class ResultTy, class ReduceFuncTy,
-          class TransformFuncTy>
-ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
-                                   ReduceFuncTy Reduce,
-                                   TransformFuncTy Transform) {
-  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
-  // overhead on large inputs.
-  size_t NumInputs = std::distance(Begin, End);
-  if (NumInputs == 0)
-    return std::move(Init);
-  size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs);
-  std::vector<ResultTy> Results(NumTasks, Init);
-  {
-    // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs
-    // remaining after dividing them equally amongst tasks are distributed as
-    // one extra input over the first tasks.
-    TaskGroup TG;
-    size_t TaskSize = NumInputs / NumTasks;
-    size_t RemainingInputs = NumInputs % NumTasks;
-    IterTy TBegin = Begin;
-    for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) {
-      IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0);
-      TG.spawn([=, &Transform, &Reduce, &Results] {
-        // Reduce the result of transformation eagerly within each task.
-        ResultTy R = Init;
-        for (IterTy It = TBegin; It != TEnd; ++It)
-          R = Reduce(R, Transform(*It));
-        Results[TaskId] = R;
-      });
-      TBegin = TEnd;
-    }
-    assert(TBegin == End);
-  }
-
-  // Do a final reduction. There are at most 1024 tasks, so this only adds
-  // constant single-threaded overhead for large inputs. Hopefully most
-  // reductions are cheaper than the transformation.
-  ResultTy FinalResult = std::move(Results.front());
-  for (ResultTy &PartialResult :
-       makeMutableArrayRef(Results.data() + 1, Results.size() - 1))
-    FinalResult = Reduce(FinalResult, std::move(PartialResult));
-  return std::move(FinalResult);
-}
-
+template <class IterTy, class ResultTy, class ReduceFuncTy, 
+          class TransformFuncTy> 
+ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init, 
+                                   ReduceFuncTy Reduce, 
+                                   TransformFuncTy Transform) { 
+  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling 
+  // overhead on large inputs. 
+  size_t NumInputs = std::distance(Begin, End); 
+  if (NumInputs == 0) 
+    return std::move(Init); 
+  size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs); 
+  std::vector<ResultTy> Results(NumTasks, Init); 
+  { 
+    // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs 
+    // remaining after dividing them equally amongst tasks are distributed as 
+    // one extra input over the first tasks. 
+    TaskGroup TG; 
+    size_t TaskSize = NumInputs / NumTasks; 
+    size_t RemainingInputs = NumInputs % NumTasks; 
+    IterTy TBegin = Begin; 
+    for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) { 
+      IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0); 
+      TG.spawn([=, &Transform, &Reduce, &Results] { 
+        // Reduce the result of transformation eagerly within each task. 
+        ResultTy R = Init; 
+        for (IterTy It = TBegin; It != TEnd; ++It) 
+          R = Reduce(R, Transform(*It)); 
+        Results[TaskId] = R; 
+      }); 
+      TBegin = TEnd; 
+    } 
+    assert(TBegin == End); 
+  } 
+ 
+  // Do a final reduction. There are at most 1024 tasks, so this only adds 
+  // constant single-threaded overhead for large inputs. Hopefully most 
+  // reductions are cheaper than the transformation. 
+  ResultTy FinalResult = std::move(Results.front()); 
+  for (ResultTy &PartialResult : 
+       makeMutableArrayRef(Results.data() + 1, Results.size() - 1)) 
+    FinalResult = Reduce(FinalResult, std::move(PartialResult)); 
+  return std::move(FinalResult); 
+} 
+ 
 #endif
 
 } // namespace detail
@@ -256,22 +256,22 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
     Fn(I);
 }
 
-template <class IterTy, class ResultTy, class ReduceFuncTy,
-          class TransformFuncTy>
-ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init,
-                                 ReduceFuncTy Reduce,
-                                 TransformFuncTy Transform) {
-#if LLVM_ENABLE_THREADS
-  if (parallel::strategy.ThreadsRequested != 1) {
-    return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce,
-                                                       Transform);
-  }
-#endif
-  for (IterTy I = Begin; I != End; ++I)
-    Init = Reduce(std::move(Init), Transform(*I));
-  return std::move(Init);
-}
-
+template <class IterTy, class ResultTy, class ReduceFuncTy, 
+          class TransformFuncTy> 
+ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init, 
+                                 ReduceFuncTy Reduce, 
+                                 TransformFuncTy Transform) { 
+#if LLVM_ENABLE_THREADS 
+  if (parallel::strategy.ThreadsRequested != 1) { 
+    return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce, 
+                                                       Transform); 
+  } 
+#endif 
+  for (IterTy I = Begin; I != End; ++I) 
+    Init = Reduce(std::move(Init), Transform(*I)); 
+  return std::move(Init); 
+} 
+ 
 // Range wrappers.
 template <class RangeTy,
           class Comparator = std::less<decltype(*std::begin(RangeTy()))>>
@@ -284,31 +284,31 @@ void parallelForEach(RangeTy &&R, FuncTy Fn) {
   parallelForEach(std::begin(R), std::end(R), Fn);
 }
 
-template <class RangeTy, class ResultTy, class ReduceFuncTy,
-          class TransformFuncTy>
-ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init,
-                                 ReduceFuncTy Reduce,
-                                 TransformFuncTy Transform) {
-  return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce,
-                                 Transform);
-}
-
-// Parallel for-each, but with error handling.
-template <class RangeTy, class FuncTy>
-Error parallelForEachError(RangeTy &&R, FuncTy Fn) {
-  // The transform_reduce algorithm requires that the initial value be copyable.
-  // Error objects are uncopyable. We only need to copy initial success values,
-  // so work around this mismatch via the C API. The C API represents success
-  // values with a null pointer. The joinErrors discards null values and joins
-  // multiple errors into an ErrorList.
-  return unwrap(parallelTransformReduce(
-      std::begin(R), std::end(R), wrap(Error::success()),
-      [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) {
-        return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs)));
-      },
-      [&Fn](auto &&V) { return wrap(Fn(V)); }));
-}
-
+template <class RangeTy, class ResultTy, class ReduceFuncTy, 
+          class TransformFuncTy> 
+ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init, 
+                                 ReduceFuncTy Reduce, 
+                                 TransformFuncTy Transform) { 
+  return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce, 
+                                 Transform); 
+} 
+ 
+// Parallel for-each, but with error handling. 
+template <class RangeTy, class FuncTy> 
+Error parallelForEachError(RangeTy &&R, FuncTy Fn) { 
+  // The transform_reduce algorithm requires that the initial value be copyable. 
+  // Error objects are uncopyable. We only need to copy initial success values, 
+  // so work around this mismatch via the C API. The C API represents success 
+  // values with a null pointer. The joinErrors discards null values and joins 
+  // multiple errors into an ErrorList. 
+  return unwrap(parallelTransformReduce( 
+      std::begin(R), std::end(R), wrap(Error::success()), 
+      [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) { 
+        return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs))); 
+      }, 
+      [&Fn](auto &&V) { return wrap(Fn(V)); })); 
+} 
+ 
 } // namespace llvm
 
 #endif // LLVM_SUPPORT_PARALLEL_H
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:30 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:30 +0300
commit	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree	012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/Support/Parallel.h
parent	6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download	ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz