Update contrib/libs/cxxsupp/openmp to 14.0.4

ref:77c6cdda99b217d50c4deadca11f5611fa0dc168
author: thegeorg <thegeorg@yandex-team.ru> 2022-06-03 10:53:07 +0300
committer: thegeorg <thegeorg@yandex-team.ru> 2022-06-03 10:53:07 +0300
commit: a1d4361e379e2c72a469ad1bd64569cbc2db131f (patch)
tree: 0caddb240a10132376e4653a31578e117d33f9fd /contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
parent: 41f55a521834080d9d703c099c0418cfff3a0546 (diff)
download: ydb-a1d4361e379e2c72a469ad1bd64569cbc2db131f.tar.gz
1 files changed, 129 insertions, 83 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
index 55e9c30763..e445438524 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
@@ -324,10 +324,16 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
 
-  // We don't need to map to shadow gtid if it is already hidden helper thread
-  if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
-    gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
-    thread = __kmp_threads[gtid];
+  // If we encounter a hidden helper task, and the current thread is not a
+  // hidden helper thread, we have to give the task to any hidden helper thread
+  // starting from its shadow one.
+  if (UNLIKELY(taskdata->td_flags.hidden_helper &&
+               !KMP_HIDDEN_HELPER_THREAD(gtid))) {
+    kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
+    __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
+    // Signal the hidden helper threads.
+    __kmp_hidden_helper_worker_thread_signal();
+    return TASK_SUCCESSFULLY_PUSHED;
   }
 
   kmp_task_team_t *task_team = thread->th.th_task_team;
@@ -434,16 +440,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
                 gtid, taskdata, thread_data->td.td_deque_ntasks,
                 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
 
-  auto hidden_helper = taskdata->td_flags.hidden_helper;
-
   __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
 
-  // Signal one worker thread to execute the task
-  if (UNLIKELY(hidden_helper)) {
-    // Wake hidden helper threads up if they're sleeping
-    __kmp_hidden_helper_worker_thread_signal();
-  }
-
   return TASK_SUCCESSFULLY_PUSHED;
 }
 
@@ -809,6 +807,24 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
            gtid, taskdata, children));
 }
 
+// Only need to keep track of child task counts if any of the following:
+// 1. team parallel and tasking not serialized;
+// 2. it is a proxy or detachable or hidden helper task
+// 3. the children counter of its parent task is greater than 0.
+// The reason for the 3rd one is for serialized team that found detached task,
+// hidden helper task, T. In this case, the execution of T is still deferred,
+// and it is also possible that a regular task depends on T. In this case, if we
+// don't track the children, task synchronization will be broken.
+static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
+  kmp_tasking_flags_t flags = taskdata->td_flags;
+  bool ret = !(flags.team_serial || flags.tasking_ser);
+  ret = ret || flags.proxy == TASK_PROXY ||
+        flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
+  ret = ret ||
+        KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
+  return ret;
+}
+
 // __kmp_task_finish: bookkeeping to do when a task finishes execution
 //
 // gtid: global thread ID for calling thread
@@ -825,8 +841,9 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_task_team_t *task_team =
       thread->th.th_task_team; // might be NULL for serial teams...
+#if KMP_DEBUG
   kmp_int32 children = 0;
-
+#endif
   KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
                 "task %p\n",
                 gtid, taskdata, resumed_task));
@@ -934,16 +951,15 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
     if (ompt)
       __ompt_task_finish(task, resumed_task, ompt_task_complete);
 #endif
-
-    // Only need to keep track of count if team parallel and tasking not
-    // serialized, or task is detachable and event has already been fulfilled
-    if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
-        taskdata->td_flags.detachable == TASK_DETACHABLE ||
-        taskdata->td_flags.hidden_helper) {
+    // TODO: What would be the balance between the conditions in the function
+    // and an atomic operation?
+    if (__kmp_track_children_task(taskdata)) {
       __kmp_release_deps(gtid, taskdata);
       // Predecrement simulated by "- 1" calculation
-      children =
-          KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+#if KMP_DEBUG
+      children = -1 +
+#endif
+          KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
       KMP_DEBUG_ASSERT(children >= 0);
       if (taskdata->td_taskgroup)
         KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
@@ -1189,7 +1205,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
   kmp_task_t *task;
   kmp_taskdata_t *taskdata;
   kmp_info_t *thread = __kmp_threads[gtid];
-  kmp_info_t *encountering_thread = thread;
   kmp_team_t *team = thread->th.th_team;
   kmp_taskdata_t *parent_task = thread->th.th_current_task;
   size_t shareds_offset;
@@ -1201,15 +1216,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     if (__kmp_enable_hidden_helper) {
       if (!TCR_4(__kmp_init_hidden_helper))
         __kmp_hidden_helper_initialize();
-
-      // For a hidden helper task encountered by a regular thread, we will push
-      // the task to the (gtid%__kmp_hidden_helper_threads_num)-th hidden helper
-      // thread.
-      if (!KMP_HIDDEN_HELPER_THREAD(gtid)) {
-        thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
-        // We don't change the parent-child relation for hidden helper task as
-        // we need that to do per-task-region synchronization.
-      }
     } else {
       // If the hidden helper task is not enabled, reset the flag to FALSE.
       flags->hidden_helper = FALSE;
@@ -1232,8 +1238,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     // Untied task encountered causes the TSC algorithm to check entire deque of
     // the victim thread. If no untied task encountered, then checking the head
     // of the deque should be enough.
-    KMP_CHECK_UPDATE(
-        encountering_thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
+    KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
   }
 
   // Detachable tasks are not proxy tasks yet but could be in the future. Doing
@@ -1247,32 +1252,30 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     }
     /* are we running in a sequential parallel or tskm_immediate_exec... we need
        tasking support enabled */
-    if ((encountering_thread->th.th_task_team) == NULL) {
+    if ((thread->th.th_task_team) == NULL) {
       /* This should only happen if the team is serialized
           setup a task team and propagate it to the thread */
       KMP_DEBUG_ASSERT(team->t.t_serialized);
       KA_TRACE(30,
                ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
                 gtid));
-      __kmp_task_team_setup(
-          encountering_thread, team,
-          1); // 1 indicates setup the current team regardless of nthreads
-      encountering_thread->th.th_task_team =
-          team->t.t_task_team[encountering_thread->th.th_task_state];
+      // 1 indicates setup the current team regardless of nthreads
+      __kmp_task_team_setup(thread, team, 1);
+      thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
     }
-    kmp_task_team_t *task_team = encountering_thread->th.th_task_team;
+    kmp_task_team_t *task_team = thread->th.th_task_team;
 
     /* tasking must be enabled now as the task might not be pushed */
     if (!KMP_TASKING_ENABLED(task_team)) {
       KA_TRACE(
           30,
           ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
-      __kmp_enable_tasking(task_team, encountering_thread);
-      kmp_int32 tid = encountering_thread->th.th_info.ds.ds_tid;
+      __kmp_enable_tasking(task_team, thread);
+      kmp_int32 tid = thread->th.th_info.ds.ds_tid;
       kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
       // No lock needed since only owner can allocate
       if (thread_data->td.td_deque == NULL) {
-        __kmp_alloc_task_deque(encountering_thread, thread_data);
+        __kmp_alloc_task_deque(thread, thread_data);
       }
     }
 
@@ -1297,11 +1300,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
 
   // Avoid double allocation here by combining shareds with taskdata
 #if USE_FAST_MEMORY
-  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(
-      encountering_thread, shareds_offset + sizeof_shareds);
+  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
+                                                               sizeof_shareds);
 #else /* ! USE_FAST_MEMORY */
-  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(
-      encountering_thread, shareds_offset + sizeof_shareds);
+  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
+                                                               sizeof_shareds);
 #endif /* USE_FAST_MEMORY */
 
   task = KMP_TASKDATA_TO_TASK(taskdata);
@@ -1328,7 +1331,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
 
   taskdata->td_task_id = KMP_GEN_TASK_ID();
   taskdata->td_team = thread->th.th_team;
-  taskdata->td_alloc_thread = encountering_thread;
+  taskdata->td_alloc_thread = thread;
   taskdata->td_parent = parent_task;
   taskdata->td_level = parent_task->td_level + 1; // increment nesting level
   KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
@@ -1342,10 +1345,16 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
 
   taskdata->td_flags = *flags;
-  taskdata->encountering_gtid = gtid;
   taskdata->td_task_team = thread->th.th_task_team;
   taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
   taskdata->td_flags.tasktype = TASK_EXPLICIT;
+  // If it is hidden helper task, we need to set the team and task team
+  // correspondingly.
+  if (flags->hidden_helper) {
+    kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
+    taskdata->td_team = shadow_thread->th.th_team;
+    taskdata->td_task_team = shadow_thread->th.th_task_team;
+  }
 
   // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
   taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
@@ -1382,11 +1391,9 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
   if (UNLIKELY(ompt_enabled.enabled))
     __ompt_task_init(taskdata, gtid);
 #endif
-  // Only need to keep track of child task counts if team parallel and tasking
-  // not serialized or if it is a proxy or detachable or hidden helper task
-  if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
-      flags->hidden_helper ||
-      !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+  // TODO: What would be the balance between the conditions in the function and
+  // an atomic operation?
+  if (__kmp_track_children_task(taskdata)) {
     KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
     if (parent_task->td_taskgroup)
       KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
@@ -1438,11 +1445,12 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
                                          size_t sizeof_shareds,
                                          kmp_routine_entry_t task_entry,
                                          kmp_int64 device_id) {
-  if (__kmp_enable_hidden_helper) {
-    auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+  auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+  // target task is untied defined in the specification
+  input_flags.tiedness = TASK_UNTIED;
+
+  if (__kmp_enable_hidden_helper)
     input_flags.hidden_helper = TRUE;
-    input_flags.tiedness = TASK_UNTIED;
-  }
 
   return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
                                sizeof_shareds, task_entry);
@@ -1613,13 +1621,15 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
     KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
 #endif
 
+    if (task->routine != NULL) {
 #ifdef KMP_GOMP_COMPAT
-    if (taskdata->td_flags.native) {
-      ((void (*)(void *))(*(task->routine)))(task->shareds);
-    } else
+      if (taskdata->td_flags.native) {
+        ((void (*)(void *))(*(task->routine)))(task->shareds);
+      } else
 #endif /* KMP_GOMP_COMPAT */
-    {
-      (*(task->routine))(gtid, task);
+      {
+        (*(task->routine))(gtid, task);
+      }
     }
     KMP_POP_PARTITIONED_TIMER();
 
@@ -2833,15 +2843,14 @@ static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
     // We need to un-mark this victim as a finished victim.  This must be done
     // before releasing the lock, or else other threads (starting with the
     // primary thread victim) might be prematurely released from the barrier!!!
-    kmp_int32 count;
-
-    count = KMP_ATOMIC_INC(unfinished_threads);
-
+#if KMP_DEBUG
+    kmp_int32 count =
+#endif
+        KMP_ATOMIC_INC(unfinished_threads);
     KA_TRACE(
         20,
         ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
          gtid, count + 1, task_team));
-
     *thread_finished = FALSE;
   }
   TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
@@ -2948,8 +2957,7 @@ static inline int __kmp_execute_tasks_template(
                 (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
                  NULL)) {
               asleep = 1;
-              __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
-                                        other_thread->th.th_sleep_loc);
+              __kmp_null_resume_wrapper(other_thread);
               // A sleeping thread should not have any tasks on it's queue.
               // There is a slight possibility that it resumes, steals a task
               // from another thread, which spawns more tasks, all in the time
@@ -3034,9 +3042,10 @@ static inline int __kmp_execute_tasks_template(
       // done.  This decrement might be to the spin location, and result in the
       // termination condition being satisfied.
       if (!*thread_finished) {
-        kmp_int32 count;
-
-        count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
+#if KMP_DEBUG
+        kmp_int32 count = -1 +
+#endif
+            KMP_ATOMIC_DEC(unfinished_threads);
         KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
                       "unfinished_threads to %d task_team=%p\n",
                       gtid, count, task_team));
@@ -3065,6 +3074,18 @@ static inline int __kmp_execute_tasks_template(
       return FALSE;
     }
 
+    // Check the flag again to see if it has already done in case to be trapped
+    // into infinite loop when a if0 task depends on a hidden helper task
+    // outside any parallel region. Detached tasks are not impacted in this case
+    // because the only thread executing this function has to execute the proxy
+    // task so it is in another code path that has the same check.
+    if (flag == NULL || (!final_spin && flag->done_check())) {
+      KA_TRACE(15,
+               ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+                gtid));
+      return TRUE;
+    }
+
     // We could be getting tasks from target constructs; if this is the only
     // thread, keep trying to execute tasks from own queue
     if (nthreads == 1 &&
@@ -3098,6 +3119,16 @@ int __kmp_execute_tasks_64(
       thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
 }
 
+template <bool C, bool S>
+int __kmp_atomic_execute_tasks_64(
+    kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
+    int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+    kmp_int32 is_constrained) {
+  return __kmp_execute_tasks_template(
+      thread, gtid, flag, final_spin,
+      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
 int __kmp_execute_tasks_oncore(
     kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@@ -3124,6 +3155,14 @@ template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
                                                  int *USE_ITT_BUILD_ARG(void *),
                                                  kmp_int32);
 
+template int __kmp_atomic_execute_tasks_64<false, true>(
+    kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
+    int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
+template int __kmp_atomic_execute_tasks_64<true, false>(
+    kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
+    int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
 // next barrier so they can assist in executing enqueued tasks.
 // First thread in allocates the task team atomically.
@@ -3162,7 +3201,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
     // tasks and execute them.  In extra barrier mode, tasks do not sleep
     // at the separate tasking barrier, so this isn't a problem.
     for (i = 0; i < nthreads; i++) {
-      volatile void *sleep_loc;
+      void *sleep_loc;
       kmp_info_t *thread = threads_data[i].td.td_thr;
 
       if (i == this_thr->th.th_info.ds.ds_tid) {
@@ -3179,7 +3218,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
         KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
                       __kmp_gtid_from_thread(this_thr),
                       __kmp_gtid_from_thread(thread)));
-        __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+        __kmp_null_resume_wrapper(thread);
       } else {
         KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
                       __kmp_gtid_from_thread(this_thr),
@@ -3451,6 +3490,7 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
 
   TCW_4(task_team->tt.tt_found_tasks, FALSE);
   TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+  TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
   task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
 
   KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
@@ -3512,9 +3552,11 @@ void __kmp_reap_task_teams(void) {
 void __kmp_wait_to_unref_task_teams(void) {
   kmp_info_t *thread;
   kmp_uint32 spins;
+  kmp_uint64 time;
   int done;
 
   KMP_INIT_YIELD(spins);
+  KMP_INIT_BACKOFF(time);
 
   for (;;) {
     done = TRUE;
@@ -3547,7 +3589,7 @@ void __kmp_wait_to_unref_task_teams(void) {
                     __kmp_gtid_from_thread(thread)));
 
       if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
-        volatile void *sleep_loc;
+        void *sleep_loc;
         // If the thread is sleeping, awaken it.
         if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
             NULL) {
@@ -3555,7 +3597,7 @@ void __kmp_wait_to_unref_task_teams(void) {
               10,
               ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
                __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
-          __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+          __kmp_null_resume_wrapper(thread);
         }
       }
     }
@@ -3564,7 +3606,7 @@ void __kmp_wait_to_unref_task_teams(void) {
     }
 
     // If oversubscribed or have waited a bit, yield.
-    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
   }
 }
 
@@ -3613,6 +3655,7 @@ void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
         TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
         TCW_4(task_team->tt.tt_found_tasks, FALSE);
         TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+        TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
         KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
                           team->t.t_nproc);
         TCW_4(task_team->tt.tt_active, TRUE);
@@ -3705,8 +3748,10 @@ void __kmp_task_team_wait(
          "setting active to false, setting local and team's pointer to NULL\n",
          __kmp_gtid_from_thread(this_thr), task_team));
     KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
-                     task_team->tt.tt_found_proxy_tasks == TRUE);
+                     task_team->tt.tt_found_proxy_tasks == TRUE ||
+                     task_team->tt.tt_hidden_helper_task_encountered == TRUE);
     TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+    TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
     KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
     TCW_SYNC_4(task_team->tt.tt_active, FALSE);
     KMP_MB();
@@ -3869,11 +3914,12 @@ static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
 }
 
 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+#if KMP_DEBUG
   kmp_int32 children = 0;
-
   // Predecrement simulated by "- 1" calculation
-  children =
-      KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+  children = -1 +
+#endif
+      KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
   KMP_DEBUG_ASSERT(children >= 0);
 
   // Remove the imaginary children
@@ -3936,7 +3982,7 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
 
   // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
   // but we cannot use __kmp_get_random here
-  kmp_int32 start_k = start;
+  kmp_int32 start_k = start % nthreads;
   kmp_int32 pass = 1;
   kmp_int32 k = start_k;
author	thegeorg <thegeorg@yandex-team.ru>	2022-06-03 10:53:07 +0300
committer	thegeorg <thegeorg@yandex-team.ru>	2022-06-03 10:53:07 +0300
commit	a1d4361e379e2c72a469ad1bd64569cbc2db131f (patch)
tree	0caddb240a10132376e4653a31578e117d33f9fd /contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
parent	41f55a521834080d9d703c099c0418cfff3a0546 (diff)
download	ydb-a1d4361e379e2c72a469ad1bd64569cbc2db131f.tar.gz