aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.ru>2022-06-03 10:53:07 +0300
committerthegeorg <thegeorg@yandex-team.ru>2022-06-03 10:53:07 +0300
commita1d4361e379e2c72a469ad1bd64569cbc2db131f (patch)
tree0caddb240a10132376e4653a31578e117d33f9fd /contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
parent41f55a521834080d9d703c099c0418cfff3a0546 (diff)
downloadydb-a1d4361e379e2c72a469ad1bd64569cbc2db131f.tar.gz
Update contrib/libs/cxxsupp/openmp to 14.0.4
ref:77c6cdda99b217d50c4deadca11f5611fa0dc168
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_tasking.cpp')
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_tasking.cpp212
1 files changed, 129 insertions, 83 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
index 55e9c30763..e445438524 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
@@ -324,10 +324,16 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- // We don't need to map to shadow gtid if it is already hidden helper thread
- if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
- gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
- thread = __kmp_threads[gtid];
+ // If we encounter a hidden helper task, and the current thread is not a
+ // hidden helper thread, we have to give the task to any hidden helper thread
+ // starting from its shadow one.
+ if (UNLIKELY(taskdata->td_flags.hidden_helper &&
+ !KMP_HIDDEN_HELPER_THREAD(gtid))) {
+ kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
+ __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
+ // Signal the hidden helper threads.
+ __kmp_hidden_helper_worker_thread_signal();
+ return TASK_SUCCESSFULLY_PUSHED;
}
kmp_task_team_t *task_team = thread->th.th_task_team;
@@ -434,16 +440,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
gtid, taskdata, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
- auto hidden_helper = taskdata->td_flags.hidden_helper;
-
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
- // Signal one worker thread to execute the task
- if (UNLIKELY(hidden_helper)) {
- // Wake hidden helper threads up if they're sleeping
- __kmp_hidden_helper_worker_thread_signal();
- }
-
return TASK_SUCCESSFULLY_PUSHED;
}
@@ -809,6 +807,24 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
gtid, taskdata, children));
}
+// Only need to keep track of child task counts if any of the following:
+// 1. team parallel and tasking not serialized;
+// 2. it is a proxy or detachable or hidden helper task
+// 3. the children counter of its parent task is greater than 0.
+// The reason for the 3rd one is for serialized team that found detached task,
+// hidden helper task, T. In this case, the execution of T is still deferred,
+// and it is also possible that a regular task depends on T. In this case, if we
+// don't track the children, task synchronization will be broken.
+static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
+ kmp_tasking_flags_t flags = taskdata->td_flags;
+ bool ret = !(flags.team_serial || flags.tasking_ser);
+ ret = ret || flags.proxy == TASK_PROXY ||
+ flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
+ ret = ret ||
+ KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
+ return ret;
+}
+
// __kmp_task_finish: bookkeeping to do when a task finishes execution
//
// gtid: global thread ID for calling thread
@@ -825,8 +841,9 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_info_t *thread = __kmp_threads[gtid];
kmp_task_team_t *task_team =
thread->th.th_task_team; // might be NULL for serial teams...
+#if KMP_DEBUG
kmp_int32 children = 0;
-
+#endif
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n",
gtid, taskdata, resumed_task));
@@ -934,16 +951,15 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_complete);
#endif
-
- // Only need to keep track of count if team parallel and tasking not
- // serialized, or task is detachable and event has already been fulfilled
- if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
- taskdata->td_flags.detachable == TASK_DETACHABLE ||
- taskdata->td_flags.hidden_helper) {
+ // TODO: What would be the balance between the conditions in the function
+ // and an atomic operation?
+ if (__kmp_track_children_task(taskdata)) {
__kmp_release_deps(gtid, taskdata);
// Predecrement simulated by "- 1" calculation
- children =
- KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+#if KMP_DEBUG
+ children = -1 +
+#endif
+ KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
if (taskdata->td_taskgroup)
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
@@ -1189,7 +1205,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_info_t *thread = __kmp_threads[gtid];
- kmp_info_t *encountering_thread = thread;
kmp_team_t *team = thread->th.th_team;
kmp_taskdata_t *parent_task = thread->th.th_current_task;
size_t shareds_offset;
@@ -1201,15 +1216,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
if (__kmp_enable_hidden_helper) {
if (!TCR_4(__kmp_init_hidden_helper))
__kmp_hidden_helper_initialize();
-
- // For a hidden helper task encountered by a regular thread, we will push
- // the task to the (gtid%__kmp_hidden_helper_threads_num)-th hidden helper
- // thread.
- if (!KMP_HIDDEN_HELPER_THREAD(gtid)) {
- thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
- // We don't change the parent-child relation for hidden helper task as
- // we need that to do per-task-region synchronization.
- }
} else {
// If the hidden helper task is not enabled, reset the flag to FALSE.
flags->hidden_helper = FALSE;
@@ -1232,8 +1238,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
// Untied task encountered causes the TSC algorithm to check entire deque of
// the victim thread. If no untied task encountered, then checking the head
// of the deque should be enough.
- KMP_CHECK_UPDATE(
- encountering_thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
+ KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
}
// Detachable tasks are not proxy tasks yet but could be in the future. Doing
@@ -1247,32 +1252,30 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
}
/* are we running in a sequential parallel or tskm_immediate_exec... we need
tasking support enabled */
- if ((encountering_thread->th.th_task_team) == NULL) {
+ if ((thread->th.th_task_team) == NULL) {
/* This should only happen if the team is serialized
setup a task team and propagate it to the thread */
KMP_DEBUG_ASSERT(team->t.t_serialized);
KA_TRACE(30,
("T#%d creating task team in __kmp_task_alloc for proxy task\n",
gtid));
- __kmp_task_team_setup(
- encountering_thread, team,
- 1); // 1 indicates setup the current team regardless of nthreads
- encountering_thread->th.th_task_team =
- team->t.t_task_team[encountering_thread->th.th_task_state];
+ // 1 indicates setup the current team regardless of nthreads
+ __kmp_task_team_setup(thread, team, 1);
+ thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
}
- kmp_task_team_t *task_team = encountering_thread->th.th_task_team;
+ kmp_task_team_t *task_team = thread->th.th_task_team;
/* tasking must be enabled now as the task might not be pushed */
if (!KMP_TASKING_ENABLED(task_team)) {
KA_TRACE(
30,
("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
- __kmp_enable_tasking(task_team, encountering_thread);
- kmp_int32 tid = encountering_thread->th.th_info.ds.ds_tid;
+ __kmp_enable_tasking(task_team, thread);
+ kmp_int32 tid = thread->th.th_info.ds.ds_tid;
kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
// No lock needed since only owner can allocate
if (thread_data->td.td_deque == NULL) {
- __kmp_alloc_task_deque(encountering_thread, thread_data);
+ __kmp_alloc_task_deque(thread, thread_data);
}
}
@@ -1297,11 +1300,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
// Avoid double allocation here by combining shareds with taskdata
#if USE_FAST_MEMORY
- taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(
- encountering_thread, shareds_offset + sizeof_shareds);
+ taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
+ sizeof_shareds);
#else /* ! USE_FAST_MEMORY */
- taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(
- encountering_thread, shareds_offset + sizeof_shareds);
+ taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
+ sizeof_shareds);
#endif /* USE_FAST_MEMORY */
task = KMP_TASKDATA_TO_TASK(taskdata);
@@ -1328,7 +1331,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_team = thread->th.th_team;
- taskdata->td_alloc_thread = encountering_thread;
+ taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_level = parent_task->td_level + 1; // increment nesting level
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
@@ -1342,10 +1345,16 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
taskdata->td_flags = *flags;
- taskdata->encountering_gtid = gtid;
taskdata->td_task_team = thread->th.th_task_team;
taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
taskdata->td_flags.tasktype = TASK_EXPLICIT;
+ // If it is hidden helper task, we need to set the team and task team
+ // correspondingly.
+ if (flags->hidden_helper) {
+ kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
+ taskdata->td_team = shadow_thread->th.th_team;
+ taskdata->td_task_team = shadow_thread->th.th_task_team;
+ }
// GEH - TODO: fix this to copy parent task's value of tasking_ser flag
taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
@@ -1382,11 +1391,9 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, gtid);
#endif
- // Only need to keep track of child task counts if team parallel and tasking
- // not serialized or if it is a proxy or detachable or hidden helper task
- if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
- flags->hidden_helper ||
- !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+ // TODO: What would be the balance between the conditions in the function and
+ // an atomic operation?
+ if (__kmp_track_children_task(taskdata)) {
KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
if (parent_task->td_taskgroup)
KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
@@ -1438,11 +1445,12 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
size_t sizeof_shareds,
kmp_routine_entry_t task_entry,
kmp_int64 device_id) {
- if (__kmp_enable_hidden_helper) {
- auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+ auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+ // target task is untied defined in the specification
+ input_flags.tiedness = TASK_UNTIED;
+
+ if (__kmp_enable_hidden_helper)
input_flags.hidden_helper = TRUE;
- input_flags.tiedness = TASK_UNTIED;
- }
return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
sizeof_shareds, task_entry);
@@ -1613,13 +1621,15 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
#endif
+ if (task->routine != NULL) {
#ifdef KMP_GOMP_COMPAT
- if (taskdata->td_flags.native) {
- ((void (*)(void *))(*(task->routine)))(task->shareds);
- } else
+ if (taskdata->td_flags.native) {
+ ((void (*)(void *))(*(task->routine)))(task->shareds);
+ } else
#endif /* KMP_GOMP_COMPAT */
- {
- (*(task->routine))(gtid, task);
+ {
+ (*(task->routine))(gtid, task);
+ }
}
KMP_POP_PARTITIONED_TIMER();
@@ -2833,15 +2843,14 @@ static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
// We need to un-mark this victim as a finished victim. This must be done
// before releasing the lock, or else other threads (starting with the
// primary thread victim) might be prematurely released from the barrier!!!
- kmp_int32 count;
-
- count = KMP_ATOMIC_INC(unfinished_threads);
-
+#if KMP_DEBUG
+ kmp_int32 count =
+#endif
+ KMP_ATOMIC_INC(unfinished_threads);
KA_TRACE(
20,
("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
gtid, count + 1, task_team));
-
*thread_finished = FALSE;
}
TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
@@ -2948,8 +2957,7 @@ static inline int __kmp_execute_tasks_template(
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
NULL)) {
asleep = 1;
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
- other_thread->th.th_sleep_loc);
+ __kmp_null_resume_wrapper(other_thread);
// A sleeping thread should not have any tasks on it's queue.
// There is a slight possibility that it resumes, steals a task
// from another thread, which spawns more tasks, all in the time
@@ -3034,9 +3042,10 @@ static inline int __kmp_execute_tasks_template(
// done. This decrement might be to the spin location, and result in the
// termination condition being satisfied.
if (!*thread_finished) {
- kmp_int32 count;
-
- count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
+#if KMP_DEBUG
+ kmp_int32 count = -1 +
+#endif
+ KMP_ATOMIC_DEC(unfinished_threads);
KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n",
gtid, count, task_team));
@@ -3065,6 +3074,18 @@ static inline int __kmp_execute_tasks_template(
return FALSE;
}
+ // Check the flag again to see if it has already done in case to be trapped
+ // into infinite loop when a if0 task depends on a hidden helper task
+ // outside any parallel region. Detached tasks are not impacted in this case
+ // because the only thread executing this function has to execute the proxy
+ // task so it is in another code path that has the same check.
+ if (flag == NULL || (!final_spin && flag->done_check())) {
+ KA_TRACE(15,
+ ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+ gtid));
+ return TRUE;
+ }
+
// We could be getting tasks from target constructs; if this is the only
// thread, keep trying to execute tasks from own queue
if (nthreads == 1 &&
@@ -3098,6 +3119,16 @@ int __kmp_execute_tasks_64(
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
+template <bool C, bool S>
+int __kmp_atomic_execute_tasks_64(
+ kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
+ int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_template(
+ thread, gtid, flag, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
int __kmp_execute_tasks_oncore(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@@ -3124,6 +3155,14 @@ template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
+template int __kmp_atomic_execute_tasks_64<false, true>(
+ kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
+ int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
+template int __kmp_atomic_execute_tasks_64<true, false>(
+ kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
+ int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically.
@@ -3162,7 +3201,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
// tasks and execute them. In extra barrier mode, tasks do not sleep
// at the separate tasking barrier, so this isn't a problem.
for (i = 0; i < nthreads; i++) {
- volatile void *sleep_loc;
+ void *sleep_loc;
kmp_info_t *thread = threads_data[i].td.td_thr;
if (i == this_thr->th.th_info.ds.ds_tid) {
@@ -3179,7 +3218,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+ __kmp_null_resume_wrapper(thread);
} else {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
@@ -3451,6 +3490,7 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
TCW_4(task_team->tt.tt_found_tasks, FALSE);
TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+ TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
@@ -3512,9 +3552,11 @@ void __kmp_reap_task_teams(void) {
void __kmp_wait_to_unref_task_teams(void) {
kmp_info_t *thread;
kmp_uint32 spins;
+ kmp_uint64 time;
int done;
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
for (;;) {
done = TRUE;
@@ -3547,7 +3589,7 @@ void __kmp_wait_to_unref_task_teams(void) {
__kmp_gtid_from_thread(thread)));
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- volatile void *sleep_loc;
+ void *sleep_loc;
// If the thread is sleeping, awaken it.
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {
@@ -3555,7 +3597,7 @@ void __kmp_wait_to_unref_task_teams(void) {
10,
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+ __kmp_null_resume_wrapper(thread);
}
}
}
@@ -3564,7 +3606,7 @@ void __kmp_wait_to_unref_task_teams(void) {
}
// If oversubscribed or have waited a bit, yield.
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
}
@@ -3613,6 +3655,7 @@ void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
TCW_4(task_team->tt.tt_found_tasks, FALSE);
TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+ TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
team->t.t_nproc);
TCW_4(task_team->tt.tt_active, TRUE);
@@ -3705,8 +3748,10 @@ void __kmp_task_team_wait(
"setting active to false, setting local and team's pointer to NULL\n",
__kmp_gtid_from_thread(this_thr), task_team));
KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
- task_team->tt.tt_found_proxy_tasks == TRUE);
+ task_team->tt.tt_found_proxy_tasks == TRUE ||
+ task_team->tt.tt_hidden_helper_task_encountered == TRUE);
TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+ TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
TCW_SYNC_4(task_team->tt.tt_active, FALSE);
KMP_MB();
@@ -3869,11 +3914,12 @@ static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
}
static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+#if KMP_DEBUG
kmp_int32 children = 0;
-
// Predecrement simulated by "- 1" calculation
- children =
- KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+ children = -1 +
+#endif
+ KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
// Remove the imaginary children
@@ -3936,7 +3982,7 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
// This should be similar to start_k = __kmp_get_random( thread ) % nthreads
// but we cannot use __kmp_get_random here
- kmp_int32 start_k = start;
+ kmp_int32 start_k = start % nthreads;
kmp_int32 pass = 1;
kmp_int32 k = start_k;