summaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
diff options
context:
space:
mode:
authormikhnenko <[email protected]>2025-07-15 20:05:43 +0300
committermikhnenko <[email protected]>2025-07-15 20:52:16 +0300
commita40bd4f45bbc18fd95b1596e655b8942ceb2cf4b (patch)
treebce599ca02c778c277198de6d131d37db71997d0 /contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
parent728e0eaef4dc1f1152d2c3a4cc1bbdf597f3ef3d (diff)
Update contrib/libs/cxxsupp/openmp to 20.1.7
commit_hash:722dd5fe79203d22ad4a0be288ac0caeb6b3dd68
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_runtime.cpp')
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_runtime.cpp1848
1 files changed, 1034 insertions, 814 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp b/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
index bfbff03bd62..c26992ab98b 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
@@ -24,6 +24,7 @@
#include "kmp_wait_release.h"
#include "kmp_wrapper_getpid.h"
#include "kmp_dispatch.h"
+#include "kmp_utils.h"
#if KMP_USE_HIER_SCHED
#error #include "kmp_dispatch_hier.h"
#endif
@@ -47,8 +48,9 @@ static char *ProfileTraceFile = nullptr;
#include <process.h>
#endif
-#if KMP_OS_WINDOWS
-// windows does not need include files as it doesn't use shared memory
+#ifndef KMP_USE_SHM
+// Windows and WASI do not need these include files as they don't use shared
+// memory.
#else
#include <sys/mman.h>
#include <sys/stat.h>
@@ -111,6 +113,21 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
int new_nthreads);
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
+static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
+ int level) {
+ kmp_nested_nthreads_t *new_nested_nth =
+ (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
+ sizeof(kmp_nested_nthreads_t));
+ int new_size = level + thr->th.th_set_nested_nth_sz;
+ new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));
+ for (int i = 0; i < level + 1; ++i)
+ new_nested_nth->nth[i] = 0;
+ for (int i = level + 1, j = 1; i < new_size; ++i, ++j)
+ new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
+ new_nested_nth->size = new_nested_nth->used = new_size;
+ return new_nested_nth;
+}
+
/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
@@ -178,7 +195,12 @@ int __kmp_get_global_thread_id() {
if (stack_diff <= stack_size) {
/* The only way we can be closer than the allocated */
/* stack size is if we are running on this thread. */
- KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
+ // __kmp_gtid_get_specific can return negative value because this
+ // function can be called by thread destructor. However, before the
+ // thread destructor is called, the value of the corresponding
+ // thread-specific data will be reset to NULL.
+ KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
+ __kmp_gtid_get_specific() == i);
return i;
}
}
@@ -196,6 +218,12 @@ int __kmp_get_global_thread_id() {
if (i < 0)
return i;
+ // other_threads[i] can be nullptr at this point because the corresponding
+ // thread could have already been destructed. It can happen when this function
+ // is called in end library routine.
+ if (!TCR_SYNC_PTR(other_threads[i]))
+ return i;
+
/* dynamically updated stack window for uber threads to avoid get_specific
call */
if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
@@ -405,6 +433,8 @@ void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
}
#endif /* KMP_PRINT_DATA_PLACEMENT */
__kmp_release_bootstrap_lock(&__kmp_stdio_lock);
+
+ va_end(ap);
}
void __kmp_warn(char const *format, ...) {
@@ -433,26 +463,26 @@ void __kmp_abort_process() {
__kmp_dump_debug_buffer();
}
- if (KMP_OS_WINDOWS) {
- // Let other threads know of abnormal termination and prevent deadlock
- // if abort happened during library initialization or shutdown
- __kmp_global.g.g_abort = SIGABRT;
-
- /* On Windows* OS by default abort() causes pop-up error box, which stalls
- nightly testing. Unfortunately, we cannot reliably suppress pop-up error
- boxes. _set_abort_behavior() works well, but this function is not
- available in VS7 (this is not problem for DLL, but it is a problem for
- static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
- help, at least in some versions of MS C RTL.
-
- It seems following sequence is the only way to simulate abort() and
- avoid pop-up error box. */
- raise(SIGABRT);
- _exit(3); // Just in case, if signal ignored, exit anyway.
- } else {
- __kmp_unregister_library();
- abort();
- }
+#if KMP_OS_WINDOWS
+ // Let other threads know of abnormal termination and prevent deadlock
+ // if abort happened during library initialization or shutdown
+ __kmp_global.g.g_abort = SIGABRT;
+
+ /* On Windows* OS by default abort() causes pop-up error box, which stalls
+ nightly testing. Unfortunately, we cannot reliably suppress pop-up error
+ boxes. _set_abort_behavior() works well, but this function is not
+ available in VS7 (this is not problem for DLL, but it is a problem for
+ static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
+ help, at least in some versions of MS C RTL.
+
+ It seems following sequence is the only way to simulate abort() and
+ avoid pop-up error box. */
+ raise(SIGABRT);
+ _exit(3); // Just in case, if signal ignored, exit anyway.
+#else
+ __kmp_unregister_library();
+ abort();
+#endif
__kmp_infinite_loop();
__kmp_release_bootstrap_lock(&__kmp_exit_lock);
@@ -553,6 +583,14 @@ static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
/* ------------------------------------------------------------------------ */
+#if ENABLE_LIBOMPTARGET
+static void __kmp_init_omptarget() {
+ __kmp_init_target_task();
+}
+#endif
+
+/* ------------------------------------------------------------------------ */
+
#if KMP_DYNAMIC_LIB
#if KMP_OS_WINDOWS
@@ -907,6 +945,11 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
__kmp_get_gtid(), new_nthreads, set_nthreads));
}
#endif // KMP_DEBUG
+
+ if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
+ __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
+ this_thr->th.th_nt_msg);
+ }
return new_nthreads;
}
@@ -1011,6 +1054,47 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
__kmp_partition_places(team);
}
#endif
+
+ if (team->t.t_nproc > 1 &&
+ __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ team->t.b->update_num_threads(team->t.t_nproc);
+ __kmp_add_threads_to_team(team, team->t.t_nproc);
+ }
+ }
+
+ // Take care of primary thread's task state
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ if (use_hot_team) {
+ KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
+ KA_TRACE(
+ 20,
+ ("__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
+ "%p, new task_team %p / team %p\n",
+ __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
+ team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
+ team));
+
+ // Store primary thread's current task state on new team
+ KMP_CHECK_UPDATE(team->t.t_primary_task_state,
+ master_th->th.th_task_state);
+
+ // Restore primary thread's task state to hot team's state
+ // by using thread 1's task state
+ if (team->t.t_nproc > 1) {
+ KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
+ team->t.t_threads[1]->th.th_task_state == 1);
+ KMP_CHECK_UPDATE(master_th->th.th_task_state,
+ team->t.t_threads[1]->th.th_task_state);
+ } else {
+ master_th->th.th_task_state = 0;
+ }
+ } else {
+ // Store primary thread's current task_state on new team
+ KMP_CHECK_UPDATE(team->t.t_primary_task_state,
+ master_th->th.th_task_state);
+ // Are not using hot team, so set task state to 0.
+ master_th->th.th_task_state = 0;
+ }
}
if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
@@ -1116,18 +1200,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
KMP_DEBUG_ASSERT(serial_team);
KMP_MB();
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- KMP_DEBUG_ASSERT(
- this_thr->th.th_task_team ==
- this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
- KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
- NULL);
- KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
- "team %p, new task_team = NULL\n",
- global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
- this_thr->th.th_task_team = NULL;
- }
-
kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
proc_bind = proc_bind_false;
@@ -1139,6 +1211,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
// Reset for next parallel region
this_thr->th.th_set_proc_bind = proc_bind_default;
+ // Reset num_threads for next parallel region
+ this_thr->th.th_set_nproc = 0;
+
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data = ompt_data_none;
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
@@ -1210,6 +1285,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
serial_team->t.t_serialized = 1;
serial_team->t.t_nproc = 1;
serial_team->t.t_parent = this_thr->th.th_team;
+ if (this_thr->th.th_team->t.t_nested_nth)
+ serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
+ else
+ serial_team->t.t_nested_nth = &__kmp_nested_nth;
+ // Save previous team's task state on serial team structure
+ serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
this_thr->th.th_team = serial_team;
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
@@ -1229,9 +1310,11 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
// Thread value exists in the nested nthreads array for the next nested
// level
- if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
- this_thr->th.th_current_task->td_icvs.nproc =
- __kmp_nested_nth.nth[level + 1];
+ kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+ if (this_thr->th.th_team->t.t_nested_nth)
+ nested_nth = this_thr->th.th_team->t.t_nested_nth;
+ if (nested_nth->used && (level + 1 < nested_nth->used)) {
+ this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
}
if (__kmp_nested_proc_bind.used &&
@@ -1249,6 +1332,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_team_nproc = 1;
this_thr->th.th_team_master = this_thr;
this_thr->th.th_team_serialized = 1;
+ this_thr->th.th_task_team = NULL;
+ this_thr->th.th_task_state = 0;
serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
@@ -1280,10 +1365,14 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
int level = this_thr->th.th_team->t.t_level;
// Thread value exists in the nested nthreads array for the next nested
// level
- if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
- this_thr->th.th_current_task->td_icvs.nproc =
- __kmp_nested_nth.nth[level + 1];
+
+ kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+ if (serial_team->t.t_nested_nth)
+ nested_nth = serial_team->t.t_nested_nth;
+ if (nested_nth->used && (level + 1 < nested_nth->used)) {
+ this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
}
+
serial_team->t.t_level++;
KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n",
@@ -1300,6 +1389,9 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
+ /* allocate/push task team stack */
+ __kmp_push_task_team_node(this_thr, serial_team);
+
KMP_MB();
}
KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
@@ -1350,6 +1442,486 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
#endif
}
+// Test if this fork is for a team closely nested in a teams construct
+static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
+ microtask_t microtask, int level,
+ int teams_level, kmp_va_list ap) {
+ return (master_th->th.th_teams_microtask && ap &&
+ microtask != (microtask_t)__kmp_teams_master && level == teams_level);
+}
+
+// Test if this fork is for the teams construct, i.e. to form the outer league
+// of teams
+static inline bool __kmp_is_entering_teams(int active_level, int level,
+ int teams_level, kmp_va_list ap) {
+ return ((ap == NULL && active_level == 0) ||
+ (ap && teams_level > 0 && teams_level == level));
+}
+
+// AC: This is start of parallel that is nested inside teams construct.
+// The team is actual (hot), all workers are ready at the fork barrier.
+// No lock needed to initialize the team a bit, then free workers.
+static inline int
+__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
+ kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
+ enum fork_context_e call_context, microtask_t microtask,
+ launch_t invoker, int master_set_numthreads, int level,
+#if OMPT_SUPPORT
+ ompt_data_t ompt_parallel_data, void *return_address,
+#endif
+ kmp_va_list ap) {
+ void **argv;
+ int i;
+
+ parent_team->t.t_ident = loc;
+ __kmp_alloc_argv_entries(argc, parent_team, TRUE);
+ parent_team->t.t_argc = argc;
+ argv = (void **)parent_team->t.t_argv;
+ for (i = argc - 1; i >= 0; --i) {
+ *argv++ = va_arg(kmp_va_deref(ap), void *);
+ }
+ // Increment our nested depth levels, but not increase the serialization
+ if (parent_team == master_th->th.th_serial_team) {
+ // AC: we are in serialized parallel
+ __kmpc_serialized_parallel(loc, gtid);
+ KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
+
+ if (call_context == fork_context_gnu) {
+ // AC: need to decrement t_serialized for enquiry functions to work
+ // correctly, will restore at join time
+ parent_team->t.t_serialized--;
+ return TRUE;
+ }
+
+#if OMPD_SUPPORT
+ parent_team->t.t_pkfn = microtask;
+#endif
+
+#if OMPT_SUPPORT
+ void *dummy;
+ void **exit_frame_p;
+ ompt_data_t *implicit_task_data;
+ ompt_lw_taskteam_t lw_taskteam;
+
+ if (ompt_enabled.enabled) {
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+ &ompt_parallel_data, return_address);
+ exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
+
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // Don't use lw_taskteam after linking. Content was swapped.
+
+ /* OMPT implicit task begin */
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
+ 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+ }
+
+ /* OMPT state */
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ } else {
+ exit_frame_p = &dummy;
+ }
+#endif
+
+ // AC: need to decrement t_serialized for enquiry functions to work
+ // correctly, will restore at join time
+ parent_team->t.t_serialized--;
+
+ {
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
+ __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
+#if OMPT_SUPPORT
+ ,
+ exit_frame_p
+#endif
+ );
+ }
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ *exit_frame_p = NULL;
+ OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, implicit_task_data, 1,
+ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+ }
+ ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
+ __ompt_lw_taskteam_unlink(master_th);
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
+ OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
+ }
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ }
+#endif
+ return TRUE;
+ }
+
+ parent_team->t.t_pkfn = microtask;
+ parent_team->t.t_invoke = invoker;
+ KMP_ATOMIC_INC(&root->r.r_in_parallel);
+ parent_team->t.t_active_level++;
+ parent_team->t.t_level++;
+ parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
+
+ // If the threads allocated to the team are less than the thread limit, update
+ // the thread limit here. th_teams_size.nth is specific to this team nested
+ // in a teams construct, the team is fully created, and we're about to do
+ // the actual fork. Best to do this here so that the subsequent uses below
+ // and in the join have the correct value.
+ master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_lw_taskteam_t lw_taskteam;
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
+ return_address);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
+ }
+#endif
+
+ /* Change number of threads in the team if requested */
+ if (master_set_numthreads) { // The parallel has num_threads clause
+ if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
+ // AC: only can reduce number of threads dynamically, can't increase
+ kmp_info_t **other_threads = parent_team->t.t_threads;
+ // NOTE: if using distributed barrier, we need to run this code block
+ // even when the team size appears not to have changed from the max.
+ int old_proc = master_th->th.th_teams_size.nth;
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
+ __kmp_add_threads_to_team(parent_team, master_set_numthreads);
+ }
+ parent_team->t.t_nproc = master_set_numthreads;
+ for (i = 0; i < master_set_numthreads; ++i) {
+ other_threads[i]->th.th_team_nproc = master_set_numthreads;
+ }
+ }
+ // Keep extra threads hot in the team for possible next parallels
+ master_th->th.th_set_nproc = 0;
+ }
+
+#if USE_DEBUGGER
+ if (__kmp_debugging) { // Let debugger override number of threads.
+ int nth = __kmp_omp_num_threads(loc);
+ if (nth > 0) { // 0 means debugger doesn't want to change num threads
+ master_set_numthreads = nth;
+ }
+ }
+#endif
+
+ // Figure out the proc_bind policy for the nested parallel within teams
+ kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
+ // proc_bind_default means don't update
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
+ if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
+ proc_bind = proc_bind_false;
+ } else {
+ // No proc_bind clause specified; use current proc-bind-var
+ if (proc_bind == proc_bind_default) {
+ proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
+ }
+ /* else: The proc_bind policy was specified explicitly on parallel clause.
+ This overrides proc-bind-var for this parallel region, but does not
+ change proc-bind-var. */
+ // Figure the value of proc-bind-var for the child threads.
+ if ((level + 1 < __kmp_nested_proc_bind.used) &&
+ (__kmp_nested_proc_bind.bind_types[level + 1] !=
+ master_th->th.th_current_task->td_icvs.proc_bind)) {
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
+ }
+ }
+ KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
+ // Need to change the bind-var ICV to correct value for each implicit task
+ if (proc_bind_icv != proc_bind_default &&
+ master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
+ kmp_info_t **other_threads = parent_team->t.t_threads;
+ for (i = 0; i < master_th->th.th_team_nproc; ++i) {
+ other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
+ }
+ }
+ // Reset for next parallel region
+ master_th->th.th_set_proc_bind = proc_bind_default;
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
+ KMP_ITT_DEBUG) &&
+ __kmp_forkjoin_frames_mode == 3 &&
+ parent_team->t.t_active_level == 1 // only report frames at level 1
+ && master_th->th.th_teams_size.nteams == 1) {
+ kmp_uint64 tmp_time = __itt_get_timestamp();
+ master_th->th.th_frame_time = tmp_time;
+ parent_team->t.t_region_time = tmp_time;
+ }
+ if (__itt_stack_caller_create_ptr) {
+ KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
+ // create new stack stitching id before entering fork barrier
+ parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
+ }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+#if KMP_AFFINITY_SUPPORTED
+ __kmp_partition_places(parent_team);
+#endif
+
+ KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
+ "master_th=%p, gtid=%d\n",
+ root, parent_team, master_th, gtid));
+ __kmp_internal_fork(loc, gtid, parent_team);
+ KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
+ "master_th=%p, gtid=%d\n",
+ root, parent_team, master_th, gtid));
+
+ if (call_context == fork_context_gnu)
+ return TRUE;
+
+ /* Invoke microtask for PRIMARY thread */
+ KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
+ parent_team->t.t_id, parent_team->t.t_pkfn));
+
+ if (!parent_team->t.t_invoke(gtid)) {
+ KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
+ }
+ KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
+ parent_team->t.t_id, parent_team->t.t_pkfn));
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
+
+ return TRUE;
+}
+
+// Create a serialized parallel region
+static inline int
+__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
+ kmp_int32 argc, microtask_t microtask, launch_t invoker,
+ kmp_info_t *master_th, kmp_team_t *parent_team,
+#if OMPT_SUPPORT
+ ompt_data_t *ompt_parallel_data, void **return_address,
+ ompt_data_t **parent_task_data,
+#endif
+ kmp_va_list ap) {
+ kmp_team_t *team;
+ int i;
+ void **argv;
+
+/* josh todo: hypothetical question: what do we do for OS X*? */
+#if KMP_OS_LINUX && \
+ (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+ SimpleVLA<void *> args(argc);
+#else
+ void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
+#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
+ KMP_ARCH_AARCH64) */
+
+ KA_TRACE(
+ 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
+
+ __kmpc_serialized_parallel(loc, gtid);
+
+#if OMPD_SUPPORT
+ master_th->th.th_serial_team->t.t_pkfn = microtask;
+#endif
+
+ if (call_context == fork_context_intel) {
+ /* TODO this sucks, use the compiler itself to pass args! :) */
+ master_th->th.th_serial_team->t.t_ident = loc;
+ if (!ap) {
+ // revert change made in __kmpc_serialized_parallel()
+ master_th->th.th_serial_team->t.t_level--;
+// Get args from parent team for teams construct
+
+#if OMPT_SUPPORT
+ void *dummy;
+ void **exit_frame_p;
+ ompt_task_info_t *task_info;
+ ompt_lw_taskteam_t lw_taskteam;
+
+ if (ompt_enabled.enabled) {
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+ ompt_parallel_data, *return_address);
+
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_frame_p = &(task_info->frame.exit_frame.ptr);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ &(task_info->task_data), 1,
+ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+ }
+
+ /* OMPT state */
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ } else {
+ exit_frame_p = &dummy;
+ }
+#endif
+
+ {
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
+ __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
+#if OMPT_SUPPORT
+ ,
+ exit_frame_p
+#endif
+ );
+ }
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ *exit_frame_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+ }
+ *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
+ __ompt_lw_taskteam_unlink(master_th);
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ ompt_parallel_data, *parent_task_data,
+ OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
+ }
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ }
+#endif
+ } else if (microtask == (microtask_t)__kmp_teams_master) {
+ KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
+ team = master_th->th.th_team;
+ // team->t.t_pkfn = microtask;
+ team->t.t_invoke = invoker;
+ __kmp_alloc_argv_entries(argc, team, TRUE);
+ team->t.t_argc = argc;
+ argv = (void **)team->t.t_argv;
+ for (i = argc - 1; i >= 0; --i)
+ *argv++ = va_arg(kmp_va_deref(ap), void *);
+ // AC: revert change made in __kmpc_serialized_parallel()
+ // because initial code in teams should have level=0
+ team->t.t_level--;
+ // AC: call special invoker for outer "parallel" of teams construct
+ invoker(gtid);
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 0,
+ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
+ }
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ ompt_parallel_data, *parent_task_data,
+ OMPT_INVOKER(call_context) | ompt_parallel_league,
+ *return_address);
+ }
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ }
+#endif
+ } else {
+ argv = args;
+ for (i = argc - 1; i >= 0; --i)
+ *argv++ = va_arg(kmp_va_deref(ap), void *);
+ KMP_MB();
+
+#if OMPT_SUPPORT
+ void *dummy;
+ void **exit_frame_p;
+ ompt_task_info_t *task_info;
+ ompt_lw_taskteam_t lw_taskteam;
+ ompt_data_t *implicit_task_data;
+
+ if (ompt_enabled.enabled) {
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+ ompt_parallel_data, *return_address);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_frame_p = &(task_info->frame.exit_frame.ptr);
+
+ /* OMPT implicit task begin */
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
+ ompt_task_implicit);
+ OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
+ }
+
+ /* OMPT state */
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ } else {
+ exit_frame_p = &dummy;
+ }
+#endif
+
+ {
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
+ __kmp_invoke_microtask(microtask, gtid, 0, argc, args
+#if OMPT_SUPPORT
+ ,
+ exit_frame_p
+#endif
+ );
+ }
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ *exit_frame_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
+ }
+
+ *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
+ __ompt_lw_taskteam_unlink(master_th);
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ ompt_parallel_data, *parent_task_data,
+ OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
+ }
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ }
+#endif
+ }
+ } else if (call_context == fork_context_gnu) {
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_lw_taskteam_t lwt;
+ __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
+ *return_address);
+
+ lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
+ __ompt_lw_taskteam_link(&lwt, master_th, 1);
+ }
+// don't use lw_taskteam after linking. content was swaped
+#endif
+
+ // we were called from GNU native code
+ KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
+ return FALSE;
+ } else {
+ KMP_ASSERT2(call_context < fork_context_last,
+ "__kmp_serial_fork_call: unknown fork_context parameter");
+ }
+
+ KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
+ KMP_MB();
+ return FALSE;
+}
+
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int __kmp_fork_call(ident_t *loc, int gtid,
@@ -1367,6 +1939,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
int nthreads;
int master_active;
int master_set_numthreads;
+ int task_thread_limit = 0;
int level;
int active_level;
int teams_level;
@@ -1395,20 +1968,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__kmp_resume_if_soft_paused();
/* setup current data */
- master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
- // shutdown
+ // AC: potentially unsafe, not in sync with library shutdown,
+ // __kmp_threads can be freed
+ master_th = __kmp_threads[gtid];
+
parent_team = master_th->th.th_team;
master_tid = master_th->th.th_info.ds.ds_tid;
master_this_cons = master_th->th.th_local.this_construct;
root = master_th->th.th_root;
master_active = root->r.r_active;
master_set_numthreads = master_th->th.th_set_nproc;
+ task_thread_limit =
+ master_th->th.th_current_task->td_icvs.task_thread_limit;
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data = ompt_data_none;
- ompt_data_t *parent_task_data;
- ompt_frame_t *ompt_frame;
- ompt_data_t *implicit_task_data;
+ ompt_data_t *parent_task_data = NULL;
+ ompt_frame_t *ompt_frame = NULL;
void *return_address = NULL;
if (ompt_enabled.enabled) {
@@ -1458,267 +2034,44 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_th->th.th_ident = loc;
- if (master_th->th.th_teams_microtask && ap &&
- microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
- // AC: This is start of parallel that is nested inside teams construct.
- // The team is actual (hot), all workers are ready at the fork barrier.
- // No lock needed to initialize the team a bit, then free workers.
- parent_team->t.t_ident = loc;
- __kmp_alloc_argv_entries(argc, parent_team, TRUE);
- parent_team->t.t_argc = argc;
- argv = (void **)parent_team->t.t_argv;
- for (i = argc - 1; i >= 0; --i)
- *argv++ = va_arg(kmp_va_deref(ap), void *);
- // Increment our nested depth levels, but not increase the serialization
- if (parent_team == master_th->th.th_serial_team) {
- // AC: we are in serialized parallel
- __kmpc_serialized_parallel(loc, gtid);
- KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
-
- if (call_context == fork_context_gnu) {
- // AC: need to decrement t_serialized for enquiry functions to work
- // correctly, will restore at join time
- parent_team->t.t_serialized--;
- return TRUE;
- }
-
-#if OMPD_SUPPORT
- parent_team->t.t_pkfn = microtask;
-#endif
-
+ // Parallel closely nested in teams construct:
+ if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
+ return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
+ call_context, microtask, invoker,
+ master_set_numthreads, level,
#if OMPT_SUPPORT
- void *dummy;
- void **exit_frame_p;
-
- ompt_lw_taskteam_t lw_taskteam;
-
- if (ompt_enabled.enabled) {
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- &ompt_parallel_data, return_address);
- exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
-
- __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
- // don't use lw_taskteam after linking. content was swaped
-
- /* OMPT implicit task begin */
- implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
- if (ompt_enabled.ompt_callback_implicit_task) {
- OMPT_CUR_TASK_INFO(master_th)->thread_num =
- __kmp_tid_from_gtid(gtid);
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
- implicit_task_data, 1,
- OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
- }
-
- /* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
- } else {
- exit_frame_p = &dummy;
- }
-#endif
- // AC: need to decrement t_serialized for enquiry functions to work
- // correctly, will restore at join time
- parent_team->t.t_serialized--;
-
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
-#if OMPT_SUPPORT
- ,
- exit_frame_p
-#endif
- );
- }
-
-#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- *exit_frame_p = NULL;
- OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, implicit_task_data, 1,
- OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
- }
- ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
- __ompt_lw_taskteam_unlink(master_th);
- if (ompt_enabled.ompt_callback_parallel_end) {
- ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
- &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
- OMPT_INVOKER(call_context) | ompt_parallel_team,
- return_address);
- }
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
- return TRUE;
- }
-
- parent_team->t.t_pkfn = microtask;
- parent_team->t.t_invoke = invoker;
- KMP_ATOMIC_INC(&root->r.r_in_parallel);
- parent_team->t.t_active_level++;
- parent_team->t.t_level++;
- parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
-
-#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- ompt_lw_taskteam_t lw_taskteam;
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- &ompt_parallel_data, return_address);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
- }
-#endif
-
- /* Change number of threads in the team if requested */
- if (master_set_numthreads) { // The parallel has num_threads clause
- if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
- // AC: only can reduce number of threads dynamically, can't increase
- kmp_info_t **other_threads = parent_team->t.t_threads;
- // NOTE: if using distributed barrier, we need to run this code block
- // even when the team size appears not to have changed from the max.
- int old_proc = master_th->th.th_teams_size.nth;
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
- bp_dist_bar) {
- __kmp_resize_dist_barrier(parent_team, old_proc,
- master_set_numthreads);
- __kmp_add_threads_to_team(parent_team, master_set_numthreads);
- }
- parent_team->t.t_nproc = master_set_numthreads;
- for (i = 0; i < master_set_numthreads; ++i) {
- other_threads[i]->th.th_team_nproc = master_set_numthreads;
- }
- }
- // Keep extra threads hot in the team for possible next parallels
- master_th->th.th_set_nproc = 0;
- }
-
-#if USE_DEBUGGER
- if (__kmp_debugging) { // Let debugger override number of threads.
- int nth = __kmp_omp_num_threads(loc);
- if (nth > 0) { // 0 means debugger doesn't want to change num threads
- master_set_numthreads = nth;
- }
- }
-#endif
-
- // Figure out the proc_bind policy for the nested parallel within teams
- kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
- // proc_bind_default means don't update
- kmp_proc_bind_t proc_bind_icv = proc_bind_default;
- if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
- proc_bind = proc_bind_false;
- } else {
- // No proc_bind clause specified; use current proc-bind-var
- if (proc_bind == proc_bind_default) {
- proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
- }
- /* else: The proc_bind policy was specified explicitly on parallel
- clause.
- This overrides proc-bind-var for this parallel region, but does not
- change proc-bind-var. */
- // Figure the value of proc-bind-var for the child threads.
- if ((level + 1 < __kmp_nested_proc_bind.used) &&
- (__kmp_nested_proc_bind.bind_types[level + 1] !=
- master_th->th.th_current_task->td_icvs.proc_bind)) {
- proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
- }
- }
- KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
- // Need to change the bind-var ICV to correct value for each implicit task
- if (proc_bind_icv != proc_bind_default &&
- master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
- kmp_info_t **other_threads = parent_team->t.t_threads;
- for (i = 0; i < master_th->th.th_team_nproc; ++i) {
- other_threads[i]->th.th_current_task->td_icvs.proc_bind =
- proc_bind_icv;
- }
- }
- // Reset for next parallel region
- master_th->th.th_set_proc_bind = proc_bind_default;
-
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
- KMP_ITT_DEBUG) &&
- __kmp_forkjoin_frames_mode == 3 &&
- parent_team->t.t_active_level == 1 // only report frames at level 1
- && master_th->th.th_teams_size.nteams == 1) {
- kmp_uint64 tmp_time = __itt_get_timestamp();
- master_th->th.th_frame_time = tmp_time;
- parent_team->t.t_region_time = tmp_time;
- }
- if (__itt_stack_caller_create_ptr) {
- KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
- // create new stack stitching id before entering fork barrier
- parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
- }
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
-#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(parent_team);
-#endif
-
- KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
- "master_th=%p, gtid=%d\n",
- root, parent_team, master_th, gtid));
- __kmp_internal_fork(loc, gtid, parent_team);
- KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
- "master_th=%p, gtid=%d\n",
- root, parent_team, master_th, gtid));
-
- if (call_context == fork_context_gnu)
- return TRUE;
-
- /* Invoke microtask for PRIMARY thread */
- KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
- parent_team->t.t_id, parent_team->t.t_pkfn));
-
- if (!parent_team->t.t_invoke(gtid)) {
- KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
- }
- KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
- parent_team->t.t_id, parent_team->t.t_pkfn));
- KMP_MB(); /* Flush all pending memory write invalidates. */
-
- KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
-
- return TRUE;
- } // Parallel closely nested in teams construct
-
-#if KMP_DEBUG
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
- parent_team->t.t_task_team[master_th->th.th_task_state]);
- }
+ ompt_parallel_data, return_address,
#endif
+ ap);
+ } // End parallel closely nested in teams construct
// Need this to happen before we determine the number of threads, not while
// we are allocating the team
//__kmp_push_current_task_to_thread(master_th, parent_team, 0);
- int enter_teams = 0;
- if (parent_team->t.t_active_level >=
- master_th->th.th_current_task->td_icvs.max_active_levels) {
+
+ KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
+
+ // Determine the number of threads
+ int enter_teams =
+ __kmp_is_entering_teams(active_level, level, teams_level, ap);
+ if ((!enter_teams &&
+ (parent_team->t.t_active_level >=
+ master_th->th.th_current_task->td_icvs.max_active_levels)) ||
+ (__kmp_library == library_serial)) {
+ KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid));
nthreads = 1;
} else {
- enter_teams = ((ap == NULL && active_level == 0) ||
- (ap && teams_level > 0 && teams_level == level));
nthreads = master_set_numthreads
? master_set_numthreads
// TODO: get nproc directly from current task
: get__nproc_2(parent_team, master_tid);
+ // Use the thread_limit set for the current target task if exists, else go
+ // with the deduced nthreads
+ nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
+ ? task_thread_limit
+ : nthreads;
// Check if we need to take forkjoin lock? (no need for serialized
- // parallel out of teams construct). This code moved here from
- // __kmp_reserve_threads() to speedup nested serialized parallels.
- if (nthreads > 1) {
- if ((get__max_active_levels(master_th) == 1 &&
- (root->r.r_in_parallel && !enter_teams)) ||
- (__kmp_library == library_serial)) {
- KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
- " threads\n",
- gtid, nthreads));
- nthreads = 1;
- }
- }
+ // parallel out of teams construct).
if (nthreads > 1) {
/* determine how many new threads we can use */
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -1741,232 +2094,14 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// If we temporarily changed the set number of threads then restore it now
master_th->th.th_set_nproc = 0;
- /* create a serialized parallel region? */
if (nthreads == 1) {
-/* josh todo: hypothetical question: what do we do for OS X*? */
-#if KMP_OS_LINUX && \
- (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
- void *args[argc];
-#else
- void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
-#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
- KMP_ARCH_AARCH64) */
-
- KA_TRACE(20,
- ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
-
- __kmpc_serialized_parallel(loc, gtid);
-
-#if OMPD_SUPPORT
- master_th->th.th_serial_team->t.t_pkfn = microtask;
-#endif
-
- if (call_context == fork_context_intel) {
- /* TODO this sucks, use the compiler itself to pass args! :) */
- master_th->th.th_serial_team->t.t_ident = loc;
- if (!ap) {
- // revert change made in __kmpc_serialized_parallel()
- master_th->th.th_serial_team->t.t_level--;
- // Get args from parent team for teams construct
-
-#if OMPT_SUPPORT
- void *dummy;
- void **exit_frame_p;
- ompt_task_info_t *task_info;
-
- ompt_lw_taskteam_t lw_taskteam;
-
- if (ompt_enabled.enabled) {
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- &ompt_parallel_data, return_address);
-
- __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
- // don't use lw_taskteam after linking. content was swaped
-
- task_info = OMPT_CUR_TASK_INFO(master_th);
- exit_frame_p = &(task_info->frame.exit_frame.ptr);
- if (ompt_enabled.ompt_callback_implicit_task) {
- OMPT_CUR_TASK_INFO(master_th)->thread_num =
- __kmp_tid_from_gtid(gtid);
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
- &(task_info->task_data), 1,
- OMPT_CUR_TASK_INFO(master_th)->thread_num,
- ompt_task_implicit);
- }
-
- /* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
- } else {
- exit_frame_p = &dummy;
- }
-#endif
-
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- __kmp_invoke_microtask(microtask, gtid, 0, argc,
- parent_team->t.t_argv
-#if OMPT_SUPPORT
- ,
- exit_frame_p
-#endif
- );
- }
-
-#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- *exit_frame_p = NULL;
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, &(task_info->task_data), 1,
- OMPT_CUR_TASK_INFO(master_th)->thread_num,
- ompt_task_implicit);
- }
- ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
- __ompt_lw_taskteam_unlink(master_th);
- if (ompt_enabled.ompt_callback_parallel_end) {
- ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
- &ompt_parallel_data, parent_task_data,
- OMPT_INVOKER(call_context) | ompt_parallel_team,
- return_address);
- }
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
- } else if (microtask == (microtask_t)__kmp_teams_master) {
- KMP_DEBUG_ASSERT(master_th->th.th_team ==
- master_th->th.th_serial_team);
- team = master_th->th.th_team;
- // team->t.t_pkfn = microtask;
- team->t.t_invoke = invoker;
- __kmp_alloc_argv_entries(argc, team, TRUE);
- team->t.t_argc = argc;
- argv = (void **)team->t.t_argv;
- if (ap) {
- for (i = argc - 1; i >= 0; --i)
- *argv++ = va_arg(kmp_va_deref(ap), void *);
- } else {
- for (i = 0; i < argc; ++i)
- // Get args from parent team for teams construct
- argv[i] = parent_team->t.t_argv[i];
- }
- // AC: revert change made in __kmpc_serialized_parallel()
- // because initial code in teams should have level=0
- team->t.t_level--;
- // AC: call special invoker for outer "parallel" of teams construct
- invoker(gtid);
-#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, &(task_info->task_data), 0,
- OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
- }
- if (ompt_enabled.ompt_callback_parallel_end) {
- ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
- &ompt_parallel_data, parent_task_data,
- OMPT_INVOKER(call_context) | ompt_parallel_league,
- return_address);
- }
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
- } else {
- argv = args;
- for (i = argc - 1; i >= 0; --i)
- *argv++ = va_arg(kmp_va_deref(ap), void *);
- KMP_MB();
-
-#if OMPT_SUPPORT
- void *dummy;
- void **exit_frame_p;
- ompt_task_info_t *task_info;
-
- ompt_lw_taskteam_t lw_taskteam;
-
- if (ompt_enabled.enabled) {
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- &ompt_parallel_data, return_address);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
- // don't use lw_taskteam after linking. content was swaped
- task_info = OMPT_CUR_TASK_INFO(master_th);
- exit_frame_p = &(task_info->frame.exit_frame.ptr);
-
- /* OMPT implicit task begin */
- implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
- implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
- ompt_task_implicit);
- OMPT_CUR_TASK_INFO(master_th)->thread_num =
- __kmp_tid_from_gtid(gtid);
- }
-
- /* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
- } else {
- exit_frame_p = &dummy;
- }
-#endif
-
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- __kmp_invoke_microtask(microtask, gtid, 0, argc, args
+ return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
+ invoker, master_th, parent_team,
#if OMPT_SUPPORT
- ,
- exit_frame_p
+ &ompt_parallel_data, &return_address,
+ &parent_task_data,
#endif
- );
- }
-
-#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- *exit_frame_p = NULL;
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, &(task_info->task_data), 1,
- OMPT_CUR_TASK_INFO(master_th)->thread_num,
- ompt_task_implicit);
- }
-
- ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
- __ompt_lw_taskteam_unlink(master_th);
- if (ompt_enabled.ompt_callback_parallel_end) {
- ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
- &ompt_parallel_data, parent_task_data,
- OMPT_INVOKER(call_context) | ompt_parallel_team,
- return_address);
- }
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
- }
- } else if (call_context == fork_context_gnu) {
-#if OMPT_SUPPORT
- ompt_lw_taskteam_t lwt;
- __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
- return_address);
-
- lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
- __ompt_lw_taskteam_link(&lwt, master_th, 1);
-// don't use lw_taskteam after linking. content was swaped
-#endif
-
- // we were called from GNU native code
- KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
- return FALSE;
- } else {
- KMP_ASSERT2(call_context < fork_context_last,
- "__kmp_fork_call: unknown fork_context parameter");
- }
-
- KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
- KMP_MB();
- return FALSE;
+ ap);
} // if (nthreads == 1)
// GEH: only modify the executing flag in the case when not serialized
@@ -1988,9 +2123,18 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// See if we need to make a copy of the ICVs.
int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
- if ((level + 1 < __kmp_nested_nth.used) &&
- (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
- nthreads_icv = __kmp_nested_nth.nth[level + 1];
+ kmp_nested_nthreads_t *nested_nth = NULL;
+ if (!master_th->th.th_set_nested_nth &&
+ (level + 1 < parent_team->t.t_nested_nth->used) &&
+ (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
+ nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
+ } else if (master_th->th.th_set_nested_nth) {
+ nested_nth = __kmp_override_nested_nth(master_th, level);
+ if ((level + 1 < nested_nth->used) &&
+ (nested_nth->nth[level + 1] != nthreads_icv))
+ nthreads_icv = nested_nth->nth[level + 1];
+ else
+ nthreads_icv = 0; // don't update
} else {
nthreads_icv = 0; // don't update
}
@@ -2099,6 +2243,24 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
+ // Check if hot team has potentially outdated list, and if so, free it
+ if (team->t.t_nested_nth &&
+ team->t.t_nested_nth != parent_team->t.t_nested_nth) {
+ KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+ KMP_INTERNAL_FREE(team->t.t_nested_nth);
+ team->t.t_nested_nth = NULL;
+ }
+ team->t.t_nested_nth = parent_team->t.t_nested_nth;
+ if (master_th->th.th_set_nested_nth) {
+ if (!nested_nth)
+ nested_nth = __kmp_override_nested_nth(master_th, level);
+ team->t.t_nested_nth = nested_nth;
+ KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
+ master_th->th.th_set_nested_nth = NULL;
+ master_th->th.th_set_nested_nth_sz = 0;
+ master_th->th.th_nt_strict = false;
+ }
+
// Update the floating point rounding in the team if required.
propagateFPControl(team);
#if OMPD_SUPPORT
@@ -2106,64 +2268,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
ompd_bp_parallel_begin();
#endif
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- // Set primary thread's task team to team's task team. Unless this is hot
- // team, it should be NULL.
- KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
- parent_team->t.t_task_team[master_th->th.th_task_state]);
- KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team "
- "%p, new task_team %p / team %p\n",
- __kmp_gtid_from_thread(master_th),
- master_th->th.th_task_team, parent_team,
- team->t.t_task_team[master_th->th.th_task_state], team));
-
- if (active_level || master_th->th.th_task_team) {
- // Take a memo of primary thread's task_state
- KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
- if (master_th->th.th_task_state_top >=
- master_th->th.th_task_state_stack_sz) { // increase size
- kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
- kmp_uint8 *old_stack, *new_stack;
- kmp_uint32 i;
- new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
- for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
- new_stack[i] = master_th->th.th_task_state_memo_stack[i];
- }
- for (i = master_th->th.th_task_state_stack_sz; i < new_size;
- ++i) { // zero-init rest of stack
- new_stack[i] = 0;
- }
- old_stack = master_th->th.th_task_state_memo_stack;
- master_th->th.th_task_state_memo_stack = new_stack;
- master_th->th.th_task_state_stack_sz = new_size;
- __kmp_free(old_stack);
- }
- // Store primary thread's task_state on stack
- master_th->th
- .th_task_state_memo_stack[master_th->th.th_task_state_top] =
- master_th->th.th_task_state;
- master_th->th.th_task_state_top++;
-#if KMP_NESTED_HOT_TEAMS
- if (master_th->th.th_hot_teams &&
- active_level < __kmp_hot_teams_max_level &&
- team == master_th->th.th_hot_teams[active_level].hot_team) {
- // Restore primary thread's nested state if nested hot team
- master_th->th.th_task_state =
- master_th->th
- .th_task_state_memo_stack[master_th->th.th_task_state_top];
- } else {
-#endif
- master_th->th.th_task_state = 0;
-#if KMP_NESTED_HOT_TEAMS
- }
-#endif
- }
-#if !KMP_NESTED_HOT_TEAMS
- KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
- (team == root->r.r_hot_team));
-#endif
- }
-
KA_TRACE(
20,
("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
@@ -2371,8 +2475,7 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_gtid_from_thread(master_th), team,
team->t.t_task_team[master_th->th.th_task_state],
master_th->th.th_task_team));
- KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
- team->t.t_task_team[master_th->th.th_task_state]);
+ KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
}
#endif
@@ -2396,6 +2499,9 @@ void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
+ if (fork_context == fork_context_gnu) {
+ __ompt_lw_taskteam_unlink(master_th);
+ }
__kmp_join_restore_state(master_th, parent_team);
}
#endif
@@ -2430,12 +2536,6 @@ void __kmp_join_call(ident_t *loc, int gtid
parent_team->t.t_stack_id = NULL;
}
#endif
-
- if (team->t.t_nproc > 1 &&
- __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- team->t.b->update_num_threads(team->t.t_nproc);
- __kmp_add_threads_to_team(team, team->t.t_nproc);
- }
}
KMP_MB();
@@ -2613,18 +2713,11 @@ void __kmp_join_call(ident_t *loc, int gtid
}
if (__kmp_tasking_mode != tskm_immediate_exec) {
- if (master_th->th.th_task_state_top >
- 0) { // Restore task state from memo stack
- KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
- // Remember primary thread's state if we re-use this nested hot team
- master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
- master_th->th.th_task_state;
- --master_th->th.th_task_state_top; // pop
- // Now restore state at this level
- master_th->th.th_task_state =
- master_th->th
- .th_task_state_memo_stack[master_th->th.th_task_state_top];
- }
+ // Restore primary thread's task state from team structure
+ KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
+ team->t.t_primary_task_state == 1);
+ master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
+
// Copy the task team from the parent team to the primary thread
master_th->th.th_task_team =
parent_team->t.t_task_team[master_th->th.th_task_state];
@@ -2642,7 +2735,7 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if KMP_AFFINITY_SUPPORTED
- if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
@@ -3223,6 +3316,8 @@ static kmp_internal_control_t __kmp_get_global_icvs(void) {
// next parallel region (per thread)
// (use a max ub on value if __kmp_parallel_initialize not called yet)
__kmp_cg_max_nth, // int thread_limit;
+ __kmp_task_max_nth, // int task_thread_limit; // to set the thread_limit
+ // on task. This is used in the case of target thread_limit
__kmp_dflt_max_active_levels, // int max_active_levels; //internal control
// for max_active_levels
r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
@@ -3299,6 +3394,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
root_team->t.t_serialized = 1;
// TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
root_team->t.t_sched.sched = r_sched.sched;
+ root_team->t.t_nested_nth = &__kmp_nested_nth;
KA_TRACE(
20,
("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
@@ -3336,6 +3432,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
// TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
hot_team->t.t_sched.sched = r_sched.sched;
hot_team->t.t_size_changed = 0;
+ hot_team->t.t_nested_nth = &__kmp_nested_nth;
}
#ifdef KMP_DEBUG
@@ -3934,7 +4031,7 @@ int __kmp_register_root(int initial_thread) {
__kmp_root_counter++;
#if OMPT_SUPPORT
- if (!initial_thread && ompt_enabled.enabled) {
+ if (ompt_enabled.enabled) {
kmp_info_t *root_thread = ompt_get_thread();
@@ -4202,6 +4299,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
else // no tasking --> always safe to reap
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
this_thr->th.th_set_proc_bind = proc_bind_default;
+
#if KMP_AFFINITY_SUPPORTED
this_thr->th.th_new_place = this_thr->th.th_current_place;
#endif
@@ -4311,17 +4409,6 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
this_thr->th.th_next_pool = NULL;
- if (!this_thr->th.th_task_state_memo_stack) {
- size_t i;
- this_thr->th.th_task_state_memo_stack =
- (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
- this_thr->th.th_task_state_top = 0;
- this_thr->th.th_task_state_stack_sz = 4;
- for (i = 0; i < this_thr->th.th_task_state_stack_sz;
- ++i) // zero init the stack
- this_thr->th.th_task_state_memo_stack[i] = 0;
- }
-
KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
@@ -4346,8 +4433,10 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
#endif
KMP_MB();
- /* first, try to get one from the thread pool */
- if (__kmp_thread_pool) {
+ /* first, try to get one from the thread pool unless allocating thread is
+ * the main hidden helper thread. The hidden helper team should always
+ * allocate new OS threads. */
+ if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
__kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
if (new_thr == __kmp_thread_pool_insert_pt) {
@@ -4376,8 +4465,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
TCW_4(__kmp_nth, __kmp_nth + 1);
new_thr->th.th_task_state = 0;
- new_thr->th.th_task_state_top = 0;
- new_thr->th.th_task_state_stack_sz = 4;
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
// Make sure pool thread has transitioned to waiting on own thread struct
@@ -4412,7 +4499,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
}
/* no, well fork a new one */
- KMP_ASSERT(__kmp_nth == __kmp_all_nth);
+ KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
#if KMP_USE_MONITOR
@@ -4465,6 +4552,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
/* allocate space for it. */
new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
+ new_thr->th.th_nt_strict = false;
+ new_thr->th.th_nt_loc = NULL;
+ new_thr->th.th_nt_sev = severity_fatal;
+ new_thr->th.th_nt_msg = NULL;
+
TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
@@ -4575,6 +4667,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_active_in_pool = FALSE;
TCW_4(new_thr->th.th_active, TRUE);
+ new_thr->th.th_set_nested_nth = NULL;
+ new_thr->th.th_set_nested_nth_sz = 0;
+
/* adjust the global counters */
__kmp_all_nth++;
__kmp_nth++;
@@ -4603,6 +4698,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
}
#endif /* KMP_ADJUST_BLOCKTIME */
+#if KMP_AFFINITY_SUPPORTED
+ // Set the affinity and topology information for new thread
+ __kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
+#endif
+
/* actually fork it and create the new worker thread */
KF_TRACE(
10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
@@ -4695,26 +4795,20 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
}
-#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
-/* Sets full mask for thread and returns old mask, no changes to structures. */
-static void
-__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
- if (KMP_AFFINITY_CAPABLE()) {
- int status;
- if (old_mask != NULL) {
- status = __kmp_get_system_affinity(old_mask, TRUE);
- int error = errno;
- if (status != 0) {
- __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
- __kmp_msg_null);
- }
- }
- __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
+#if KMP_AFFINITY_SUPPORTED
+static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
+ int first, int last, int newp) {
+ th->th.th_first_place = first;
+ th->th.th_last_place = last;
+ th->th.th_new_place = newp;
+ if (newp != th->th.th_current_place) {
+ if (__kmp_display_affinity && team->t.t_display_affinity != 1)
+ team->t.t_display_affinity = 1;
+ // Copy topology information associated with the new place
+ th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
+ th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
}
}
-#endif
-
-#if KMP_AFFINITY_SUPPORTED
// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
// It calculates the worker + primary thread's partition based upon the parent
@@ -4731,6 +4825,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
int first_place = master_th->th.th_first_place;
int last_place = master_th->th.th_last_place;
int masters_place = master_th->th.th_current_place;
+ int num_masks = __kmp_affinity.num_masks;
team->t.t_first_place = first_place;
team->t.t_last_place = last_place;
@@ -4753,13 +4848,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
for (f = 1; f < n_th; f++) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
- th->th.th_first_place = first_place;
- th->th.th_last_place = last_place;
- th->th.th_new_place = masters_place;
- if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4775,7 +4864,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (first_place <= last_place) {
n_places = last_place - first_place + 1;
} else {
- n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+ n_places = num_masks - first_place + last_place + 1;
}
if (n_th <= n_places) {
int place = masters_place;
@@ -4785,18 +4874,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
}
- th->th.th_first_place = first_place;
- th->th.th_last_place = last_place;
- th->th.th_new_place = place;
- if (__kmp_display_affinity && place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ __kmp_set_thread_place(team, th, first_place, last_place, place);
KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4815,13 +4898,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
- th->th.th_first_place = first_place;
- th->th.th_last_place = last_place;
- th->th.th_new_place = place;
- if (__kmp_display_affinity && place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ __kmp_set_thread_place(team, th, first_place, last_place, place);
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -4830,7 +4907,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
// we added an extra thread to this place; move to next place
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4841,7 +4918,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else if (s_count == S) { // place full; don't add extra
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4868,12 +4945,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (first_place <= last_place) {
n_places = last_place - first_place + 1;
} else {
- n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+ n_places = num_masks - first_place + last_place + 1;
}
if (n_th <= n_places) {
int place = -1;
- if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
+ if (n_places != num_masks) {
int S = n_places / n_th;
int s_count, rem, gap, gap_ct;
@@ -4888,17 +4965,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
- th->th.th_first_place = place;
- th->th.th_new_place = place;
- if (__kmp_display_affinity && place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ int fplace = place, nplace = place;
s_count = 1;
while (s_count < S) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4908,7 +4980,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (rem && (gap_ct == gap)) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4916,12 +4988,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
rem--;
gap_ct = 0;
}
- th->th.th_last_place = place;
+ __kmp_set_thread_place(team, th, fplace, place, nplace);
gap_ct++;
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4929,10 +5001,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
- "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
+ "partition = [%d,%d], num_masks: %u\n",
__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
f, th->th.th_new_place, th->th.th_first_place,
- th->th.th_last_place, __kmp_affinity_num_masks));
+ th->th.th_last_place, num_masks));
}
} else {
/* Having uniform space of available computation places I can create
@@ -4982,13 +5054,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KMP_DEBUG_ASSERT(last_place >= first_place);
th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th);
- th->th.th_first_place = first;
- th->th.th_new_place = place;
- th->th.th_last_place = last;
- if (__kmp_display_affinity && place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ __kmp_set_thread_place(team, th, first, last, place);
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n",
@@ -5014,13 +5080,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
- th->th.th_first_place = place;
- th->th.th_last_place = place;
- th->th.th_new_place = place;
- if (__kmp_display_affinity && place != th->th.th_current_place &&
- team->t.t_display_affinity != 1) {
- team->t.t_display_affinity = 1;
- }
+ __kmp_set_thread_place(team, th, place, place, place);
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -5029,7 +5089,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
// we added an extra thread to this place; move on to next place
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -5040,7 +5100,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else if (s_count == S) { // place is full; don't add extra thread
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -5210,6 +5270,15 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
// Activate team threads via th_used_in_team
__kmp_add_threads_to_team(team, new_nproc);
}
+ // When decreasing team size, threads no longer in the team should
+ // unref task team.
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ for (f = new_nproc; f < team->t.t_nproc; f++) {
+ kmp_info_t *th = team->t.t_threads[f];
+ KMP_DEBUG_ASSERT(th);
+ th->th.th_task_team = NULL;
+ }
+ }
#if KMP_NESTED_HOT_TEAMS
if (__kmp_hot_teams_mode == 0) {
// AC: saved number of threads should correspond to team's value in this
@@ -5220,11 +5289,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
/* release the extra threads we don't need any more */
for (f = new_nproc; f < team->t.t_nproc; f++) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- // When decreasing team size, threads no longer in the team should
- // unref task team.
- team->t.t_threads[f]->th.th_task_team = NULL;
- }
__kmp_free_thread(team->t.t_threads[f]);
team->t.t_threads[f] = NULL;
}
@@ -5278,12 +5342,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
}
} else { // team->t.t_nproc < new_nproc
-#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
- kmp_affin_mask_t *old_mask;
- if (KMP_AFFINITY_CAPABLE()) {
- KMP_CPU_ALLOC(old_mask);
- }
-#endif
KA_TRACE(20,
("__kmp_allocate_team: increasing hot team thread count to %d\n",
@@ -5326,13 +5384,14 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_reinitialize_team(team, new_icvs, NULL);
}
-#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
+ KMP_AFFINITY_SUPPORTED
/* Temporarily set full mask for primary thread before creation of
workers. The reason is that workers inherit the affinity from the
primary thread, so if a lot of workers are created on the single
core quickly, they don't get a chance to set their own affinity for
a long time. */
- __kmp_set_thread_affinity_mask_full_tmp(old_mask);
+ kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
#endif
/* allocate new threads for the hot team */
@@ -5362,12 +5421,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
}
-#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
- if (KMP_AFFINITY_CAPABLE()) {
- /* Restore initial primary thread's affinity mask */
- __kmp_set_system_affinity(old_mask, TRUE);
- KMP_CPU_FREE(old_mask);
- }
+#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
+ KMP_AFFINITY_SUPPORTED
+ /* Restore initial primary thread's affinity mask */
+ new_temp_affinity.restore();
#endif
#if KMP_NESTED_HOT_TEAMS
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
@@ -5388,21 +5445,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_initialize_info(team->t.t_threads[f], team, f,
__kmp_gtid_from_tid(f, team));
- if (level) { // set th_task_state for new threads in nested hot team
- // __kmp_initialize_info() no longer zeroes th_task_state, so we should
- // only need to set the th_task_state for the new threads. th_task_state
- // for primary thread will not be accurate until after this in
- // __kmp_fork_call(), so we look to the primary thread's memo_stack to
- // get the correct value.
- for (f = old_nproc; f < team->t.t_nproc; ++f)
- team->t.t_threads[f]->th.th_task_state =
- team->t.t_threads[0]->th.th_task_state_memo_stack[level];
- } else { // set th_task_state for new threads in non-nested hot team
- // copy primary thread's state
- kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
- for (f = old_nproc; f < team->t.t_nproc; ++f)
- team->t.t_threads[f]->th.th_task_state = old_state;
- }
+ // set th_task_state for new threads in hot team with older thread's state
+ kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
+ for (f = old_nproc; f < team->t.t_nproc; ++f)
+ team->t.t_threads[f]->th.th_task_state = old_state;
#ifdef KMP_DEBUG
for (f = 0; f < team->t.t_nproc; ++f) {
@@ -5420,7 +5466,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
} // Check changes in number of threads
- kmp_info_t *master = team->t.t_threads[0];
if (master->th.th_teams_microtask) {
for (f = 1; f < new_nproc; ++f) {
// propagate teams construct specific info to workers
@@ -5526,6 +5571,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__ompt_team_assign_id(team, ompt_parallel_data);
#endif
+ team->t.t_nested_nth = NULL;
+
KMP_MB();
return team;
@@ -5597,6 +5644,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KMP_MB();
+ team->t.t_nested_nth = NULL;
+
KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
team->t.t_id));
@@ -5672,9 +5721,8 @@ void __kmp_free_team(kmp_root_t *root,
}
#endif
// first check if thread is sleeping
- kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
- if (fl.is_sleeping())
- fl.resume(__kmp_gtid_from_thread(th));
+ if (th->th.th_sleep_loc)
+ __kmp_null_resume_wrapper(th);
KMP_CPU_PAUSE();
}
}
@@ -5700,6 +5748,14 @@ void __kmp_free_team(kmp_root_t *root,
}
}
+ // Before clearing parent pointer, check if nested_nth list should be freed
+ if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
+ team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
+ KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+ KMP_INTERNAL_FREE(team->t.t_nested_nth);
+ }
+ team->t.t_nested_nth = NULL;
+
// Reset pointer to parent team only for non-hot teams.
team->t.t_parent = NULL;
team->t.t_level = 0;
@@ -5709,8 +5765,8 @@ void __kmp_free_team(kmp_root_t *root,
for (f = 1; f < team->t.t_nproc; ++f) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
- 1, 2);
+ (void)KMP_COMPARE_AND_STORE_ACQ32(
+ &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
}
__kmp_free_thread(team->t.t_threads[f]);
}
@@ -6047,7 +6103,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
__kmp_join_barrier(gtid);
}
}
- TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
@@ -6216,11 +6271,6 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
thread->th.th_pri_common = NULL;
}
- if (thread->th.th_task_state_memo_stack != NULL) {
- __kmp_free(thread->th.th_task_state_memo_stack);
- thread->th.th_task_state_memo_stack = NULL;
- }
-
#if KMP_USE_BGET
if (thread->th.th_local.bget_data != NULL) {
__kmp_finalize_bget(thread);
@@ -6683,6 +6733,13 @@ static inline char *__kmp_reg_status_name() {
#endif
} // __kmp_reg_status_get
+#if defined(KMP_USE_SHM)
+bool __kmp_shm_available = false;
+bool __kmp_tmp_available = false;
+// If /dev/shm is not accessible, we will create a temporary file under /tmp.
+char *temp_reg_status_file_name = nullptr;
+#endif
+
void __kmp_register_library_startup(void) {
char *name = __kmp_reg_status_name(); // Name of the environment variable.
@@ -6708,52 +6765,108 @@ void __kmp_register_library_startup(void) {
char *value = NULL; // Actual value of the environment variable.
#if defined(KMP_USE_SHM)
- char *shm_name = __kmp_str_format("/%s", name);
- int shm_preexist = 0;
- char *data1;
- int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
- if ((fd1 == -1) && (errno == EEXIST)) {
- // file didn't open because it already exists.
- // try opening existing file
- fd1 = shm_open(shm_name, O_RDWR, 0666);
- if (fd1 == -1) { // file didn't open
- // error out here
- __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM"), KMP_ERR(0),
- __kmp_msg_null);
- } else {
- // able to open existing file
- shm_preexist = 1;
+ char *shm_name = nullptr;
+ char *data1 = nullptr;
+ __kmp_shm_available = __kmp_detect_shm();
+ if (__kmp_shm_available) {
+ int fd1 = -1;
+ shm_name = __kmp_str_format("/%s", name);
+ int shm_preexist = 0;
+ fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
+ if ((fd1 == -1) && (errno == EEXIST)) {
+ // file didn't open because it already exists.
+ // try opening existing file
+ fd1 = shm_open(shm_name, O_RDWR, 0600);
+ if (fd1 == -1) { // file didn't open
+ KMP_WARNING(FunctionError, "Can't open SHM");
+ __kmp_shm_available = false;
+ } else { // able to open existing file
+ shm_preexist = 1;
+ }
}
- } else if (fd1 == -1) { // SHM didn't open; it was due to error other than
- // already exists.
- // error out here.
- __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2"), KMP_ERR(errno),
- __kmp_msg_null);
- }
- if (shm_preexist == 0) {
- // we created SHM now set size
- if (ftruncate(fd1, SHM_SIZE) == -1) {
- // error occured setting size;
- __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM"),
- KMP_ERR(errno), __kmp_msg_null);
+ if (__kmp_shm_available && shm_preexist == 0) { // SHM created, set size
+ if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
+ KMP_WARNING(FunctionError, "Can't set size of SHM");
+ __kmp_shm_available = false;
+ }
}
+ if (__kmp_shm_available) { // SHM exists, now map it
+ data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd1, 0);
+ if (data1 == MAP_FAILED) { // failed to map shared memory
+ KMP_WARNING(FunctionError, "Can't map SHM");
+ __kmp_shm_available = false;
+ }
+ }
+ if (__kmp_shm_available) { // SHM mapped
+ if (shm_preexist == 0) { // set data to SHM, set value
+ KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
+ }
+ // Read value from either what we just wrote or existing file.
+ value = __kmp_str_format("%s", data1); // read value from SHM
+ munmap(data1, SHM_SIZE);
+ }
+ if (fd1 != -1)
+ close(fd1);
+ }
+ if (!__kmp_shm_available)
+ __kmp_tmp_available = __kmp_detect_tmp();
+ if (!__kmp_shm_available && __kmp_tmp_available) {
+ // SHM failed to work due to an error other than that the file already
+ // exists. Try to create a temp file under /tmp.
+ // If /tmp isn't accessible, fall back to using environment variable.
+ // TODO: /tmp might not always be the temporary directory. For now we will
+ // not consider TMPDIR.
+ int fd1 = -1;
+ temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name);
+ int tmp_preexist = 0;
+ fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
+ if ((fd1 == -1) && (errno == EEXIST)) {
+ // file didn't open because it already exists.
+ // try opening existing file
+ fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
+ if (fd1 == -1) { // file didn't open if (fd1 == -1) {
+ KMP_WARNING(FunctionError, "Can't open TEMP");
+ __kmp_tmp_available = false;
+ } else {
+ tmp_preexist = 1;
+ }
+ }
+ if (__kmp_tmp_available && tmp_preexist == 0) {
+ // we created /tmp file now set size
+ if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
+ KMP_WARNING(FunctionError, "Can't set size of /tmp file");
+ __kmp_tmp_available = false;
+ }
+ }
+ if (__kmp_tmp_available) {
+ data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd1, 0);
+ if (data1 == MAP_FAILED) { // failed to map /tmp
+ KMP_WARNING(FunctionError, "Can't map /tmp");
+ __kmp_tmp_available = false;
+ }
+ }
+ if (__kmp_tmp_available) {
+ if (tmp_preexist == 0) { // set data to TMP, set value
+ KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
+ }
+ // Read value from either what we just wrote or existing file.
+ value = __kmp_str_format("%s", data1); // read value from SHM
+ munmap(data1, SHM_SIZE);
+ }
+ if (fd1 != -1)
+ close(fd1);
}
- data1 =
- (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
- if (data1 == MAP_FAILED) {
- // failed to map shared memory
- __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM"), KMP_ERR(errno),
- __kmp_msg_null);
- }
- if (shm_preexist == 0) { // set data to SHM, set value
- KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
+ if (!__kmp_shm_available && !__kmp_tmp_available) {
+ // no /dev/shm and no /tmp -- fall back to environment variable
+ // Set environment variable, but do not overwrite if it exists.
+ __kmp_env_set(name, __kmp_registration_str, 0);
+ // read value to see if it got set
+ value = __kmp_env_get(name);
}
- // Read value from either what we just wrote or existing file.
- value = __kmp_str_format("%s", data1); // read value from SHM
- munmap(data1, SHM_SIZE);
- close(fd1);
#else // Windows and unix with static library
- // Set environment variable, but do not overwrite if it is exist.
+ // Set environment variable, but do not overwrite if it exists.
__kmp_env_set(name, __kmp_registration_str, 0);
// read value to see if it got set
value = __kmp_env_get(name);
@@ -6813,8 +6926,14 @@ void __kmp_register_library_startup(void) {
case 2: { // Neighbor is dead.
#if defined(KMP_USE_SHM)
- // close shared memory.
- shm_unlink(shm_name); // this removes file in /dev/shm
+ if (__kmp_shm_available) { // close shared memory.
+ shm_unlink(shm_name); // this removes file in /dev/shm
+ } else if (__kmp_tmp_available) {
+ unlink(temp_reg_status_file_name); // this removes the temp file
+ } else {
+ // Clear the variable and try to register library again.
+ __kmp_env_unset(name);
+ }
#else
// Clear the variable and try to register library again.
__kmp_env_unset(name);
@@ -6827,7 +6946,8 @@ void __kmp_register_library_startup(void) {
}
KMP_INTERNAL_FREE((void *)value);
#if defined(KMP_USE_SHM)
- KMP_INTERNAL_FREE((void *)shm_name);
+ if (shm_name)
+ KMP_INTERNAL_FREE((void *)shm_name);
#endif
} // while
KMP_INTERNAL_FREE((void *)name);
@@ -6840,18 +6960,32 @@ void __kmp_unregister_library(void) {
char *value = NULL;
#if defined(KMP_USE_SHM)
- char *shm_name = __kmp_str_format("/%s", name);
- int fd1 = shm_open(shm_name, O_RDONLY, 0666);
- if (fd1 == -1) {
- // file did not open. return.
- return;
- }
- char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
- if (data1 != MAP_FAILED) {
- value = __kmp_str_format("%s", data1); // read value from SHM
- munmap(data1, SHM_SIZE);
+ char *shm_name = nullptr;
+ int fd1;
+ if (__kmp_shm_available) {
+ shm_name = __kmp_str_format("/%s", name);
+ fd1 = shm_open(shm_name, O_RDONLY, 0600);
+ if (fd1 != -1) { // File opened successfully
+ char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
+ if (data1 != MAP_FAILED) {
+ value = __kmp_str_format("%s", data1); // read value from SHM
+ munmap(data1, SHM_SIZE);
+ }
+ close(fd1);
+ }
+ } else if (__kmp_tmp_available) { // try /tmp
+ fd1 = open(temp_reg_status_file_name, O_RDONLY);
+ if (fd1 != -1) { // File opened successfully
+ char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
+ if (data1 != MAP_FAILED) {
+ value = __kmp_str_format("%s", data1); // read value from /tmp
+ munmap(data1, SHM_SIZE);
+ }
+ close(fd1);
+ }
+ } else { // fall back to envirable
+ value = __kmp_env_get(name);
}
- close(fd1);
#else
value = __kmp_env_get(name);
#endif
@@ -6861,14 +6995,23 @@ void __kmp_unregister_library(void) {
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// Ok, this is our variable. Delete it.
#if defined(KMP_USE_SHM)
- shm_unlink(shm_name); // this removes file in /dev/shm
+ if (__kmp_shm_available) {
+ shm_unlink(shm_name); // this removes file in /dev/shm
+ } else if (__kmp_tmp_available) {
+ unlink(temp_reg_status_file_name); // this removes the temp file
+ } else {
+ __kmp_env_unset(name);
+ }
#else
__kmp_env_unset(name);
#endif
}
#if defined(KMP_USE_SHM)
- KMP_INTERNAL_FREE(shm_name);
+ if (shm_name)
+ KMP_INTERNAL_FREE(shm_name);
+ if (temp_reg_status_file_name)
+ KMP_INTERNAL_FREE(temp_reg_status_file_name);
#endif
KMP_INTERNAL_FREE(__kmp_registration_str);
@@ -6967,6 +7110,11 @@ static void __kmp_do_serial_initialize(void) {
__kmp_validate_locks();
+#if ENABLE_LIBOMPTARGET
+ /* Initialize functions from libomptarget */
+ __kmp_init_omptarget();
+#endif
+
/* Initialize internal memory allocator */
__kmp_init_allocator();
@@ -7192,10 +7340,12 @@ static void __kmp_do_serial_initialize(void) {
__kmp_register_atfork();
#endif
-#if !KMP_DYNAMIC_LIB
+#if !KMP_DYNAMIC_LIB || \
+ ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
{
/* Invoke the exit handler when the program finishes, only for static
- library. For dynamic library, we already have _fini and DllMain. */
+ library and macOS* dynamic. For other dynamic libraries, we already
+ have _fini and DllMain. */
int rc = atexit(__kmp_internal_end_atexit);
if (rc != 0) {
__kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
@@ -7222,6 +7372,10 @@ static void __kmp_do_serial_initialize(void) {
__kmp_init_serial = TRUE;
+ if (__kmp_version) {
+ __kmp_print_version_1();
+ }
+
if (__kmp_settings) {
__kmp_env_print();
}
@@ -7275,7 +7429,7 @@ static void __kmp_do_middle_initialize(void) {
#if KMP_AFFINITY_SUPPORTED
// __kmp_affinity_initialize() will try to set __kmp_ncores to the
// number of cores on the machine.
- __kmp_affinity_initialize();
+ __kmp_affinity_initialize(__kmp_affinity);
#endif /* KMP_AFFINITY_SUPPORTED */
@@ -7461,6 +7615,14 @@ void __kmp_hidden_helper_initialize() {
return;
}
+#if KMP_AFFINITY_SUPPORTED
+ // Initialize hidden helper affinity settings.
+ // The above __kmp_parallel_initialize() will initialize
+ // regular affinity (and topology) if not already done.
+ if (!__kmp_hh_affinity.flags.initialized)
+ __kmp_affinity_initialize(__kmp_hh_affinity);
+#endif
+
// Set the count of hidden helper tasks to be executed to zero
KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
@@ -7583,7 +7745,7 @@ int __kmp_invoke_task_func(int gtid) {
);
#if OMPT_SUPPORT
*exit_frame_p = NULL;
- this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
+ this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
#endif
#if KMP_STATS_ENABLED
@@ -7681,7 +7843,7 @@ int __kmp_invoke_teams_master(int gtid) {
#endif
__kmp_teams_master(gtid);
#if OMPT_SUPPORT
- this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
+ this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
#endif
__kmp_run_after_invoked_task(gtid, 0, this_thr, team);
return 1;
@@ -7691,7 +7853,6 @@ int __kmp_invoke_teams_master(int gtid) {
encountered by this team. since this should be enclosed in the forkjoin
critical section it should avoid race conditions with asymmetrical nested
parallelism */
-
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
kmp_info_t *thr = __kmp_threads[gtid];
@@ -7699,6 +7860,39 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
thr->th.th_set_nproc = num_threads;
}
+void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,
+ int *num_threads_list) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+
+ KMP_DEBUG_ASSERT(list_length > 1);
+
+ if (num_threads_list[0] > 0)
+ thr->th.th_set_nproc = num_threads_list[0];
+ thr->th.th_set_nested_nth =
+ (int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int));
+ for (kmp_uint32 i = 0; i < list_length; ++i)
+ thr->th.th_set_nested_nth[i] = num_threads_list[i];
+ thr->th.th_set_nested_nth_sz = list_length;
+}
+
+void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
+ const char *msg) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+ thr->th.th_nt_strict = true;
+ thr->th.th_nt_loc = loc;
+ // if sev is unset make fatal
+ if (sev == severity_warning)
+ thr->th.th_nt_sev = sev;
+ else
+ thr->th.th_nt_sev = severity_fatal;
+ // if msg is unset, use an appropriate message
+ if (msg)
+ thr->th.th_nt_msg = msg;
+ else
+ thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
+ "strict num_threads clause.";
+}
+
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
int num_threads) {
KMP_DEBUG_ASSERT(thr);
@@ -7932,8 +8126,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
+ ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
+ (ompt_state == ompt_state_wait_barrier_teams ||
+ ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
@@ -7944,15 +8140,16 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
+ ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
+ if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
+ sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
- codeptr);
+ sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
- codeptr);
+ sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
@@ -8155,6 +8352,7 @@ void __kmp_cleanup(void) {
__kmp_nested_nth.nth = NULL;
__kmp_nested_nth.size = 0;
__kmp_nested_nth.used = 0;
+
KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
__kmp_nested_proc_bind.bind_types = NULL;
__kmp_nested_proc_bind.size = 0;
@@ -8652,9 +8850,8 @@ void __kmp_aux_display_affinity(int gtid, const char *format) {
}
/* ------------------------------------------------------------------------ */
-
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
- int blocktime = arg; /* argument is in milliseconds */
+ int blocktime = arg; /* argument is in microseconds */
#if KMP_USE_MONITOR
int bt_intervals;
#endif
@@ -8730,7 +8927,6 @@ __kmp_determine_reduction_method(
int team_size;
- KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
@@ -8751,10 +8947,12 @@ __kmp_determine_reduction_method(
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
+ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
+ KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
- KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
+ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
+ KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
int teamsize_cutoff = 4;
@@ -8778,11 +8976,15 @@ __kmp_determine_reduction_method(
#else
#error "Unknown or unsupported OS"
#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
- // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
+ // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD ||
+ // KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
-#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
+#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
+ KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
+ KMP_OS_WASI || KMP_OS_AIX
// basic tuning
@@ -8930,7 +9132,8 @@ int __kmp_pause_resource(kmp_pause_status_t level) {
__kmp_soft_pause();
return 0;
}
- } else if (level == kmp_hard_paused) { // requesting hard pause
+ } else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
+ // requesting hard pause or stop_tool pause
if (__kmp_pause_status != kmp_not_paused) {
// error message about already being paused
return 1;
@@ -9018,8 +9221,8 @@ void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
// to wake it up.
for (int f = 1; f < new_nthreads; ++f) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
- KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
- 3);
+ (void)KMP_COMPARE_AND_STORE_ACQ32(
+ &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
__kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
(kmp_flag_32<false, false> *)NULL);
@@ -9181,3 +9384,20 @@ void __kmp_set_nesting_mode_threads() {
if (__kmp_nesting_mode == 1) // turn on nesting for this case only
set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
}
+
+// Empty symbols to export (see exports_so.txt) when feature is disabled
+extern "C" {
+#if !KMP_STATS_ENABLED
+void __kmp_reset_stats() {}
+#endif
+#if !USE_DEBUGGER
+int __kmp_omp_debug_struct_info = FALSE;
+int __kmp_debugging = FALSE;
+#endif
+#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
+void __kmp_itt_fini_ittlib() {}
+void __kmp_itt_init_ittlib() {}
+#endif
+}
+
+// end of file