aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.com>2022-10-20 12:16:22 +0300
committerthegeorg <thegeorg@yandex-team.com>2022-10-20 12:16:22 +0300
commitda5ee816c1598acf602c1c42845b544878400d34 (patch)
tree47c0acdeae9bbd5ceb1019b6c8e94ada327d7776
parentd37715ef865ba1c48ca505f8b96151ae6d417657 (diff)
downloadydb-da5ee816c1598acf602c1c42845b544878400d34.tar.gz
Update contrib/libs/cxxsupp/openmp to 15.0.2
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp.h78
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_affinity.cpp160
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_alloc.cpp7
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_atomic.cpp4
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_atomic.h6
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_barrier.cpp9
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_csupport.cpp80
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp228
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h79
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_ftn_os.h4
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_global.cpp12
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_lock.cpp2
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_os.h50
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_runtime.cpp63
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_sched.cpp74
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_settings.cpp31
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp3
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_taskdeps.h9
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_tasking.cpp326
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_version.cpp10
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_wait_release.h9
-rw-r--r--contrib/libs/cxxsupp/openmp/omp-tools.h20
-rw-r--r--contrib/libs/cxxsupp/openmp/omp.h10
-rw-r--r--contrib/libs/cxxsupp/openmp/ompt-event-specific.h2
-rw-r--r--contrib/libs/cxxsupp/openmp/ompt-general.cpp7
-rw-r--r--contrib/libs/cxxsupp/openmp/ompt-internal.h1
-rw-r--r--contrib/libs/cxxsupp/openmp/ompt-specific.h11
-rw-r--r--contrib/libs/cxxsupp/openmp/z_Linux_util.cpp8
-rw-r--r--library/cpp/json/converter/converter.h2
-rw-r--r--library/cpp/json/converter/ut/test_conversion.cpp20
30 files changed, 1121 insertions, 204 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp.h b/contrib/libs/cxxsupp/openmp/kmp.h
index 9502167474..4b9602626a 100644
--- a/contrib/libs/cxxsupp/openmp/kmp.h
+++ b/contrib/libs/cxxsupp/openmp/kmp.h
@@ -100,18 +100,18 @@ class kmp_stats_list;
#ifndef HWLOC_OBJ_PACKAGE
#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
#endif
-#if HWLOC_API_VERSION >= 0x00020000
-// hwloc 2.0 changed type of depth of object from unsigned to int
-typedef int kmp_hwloc_depth_t;
-#else
-typedef unsigned int kmp_hwloc_depth_t;
-#endif
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#include <xmmintrin.h>
#endif
+// The below has to be defined before including "kmp_barrier.h".
+#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
+#define KMP_INTERNAL_FREE(p) free(p)
+#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
+#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
+
#include "kmp_debug.h"
#include "kmp_lock.h"
#include "kmp_version.h"
@@ -841,7 +841,9 @@ extern unsigned __kmp_affinity_num_masks;
extern void __kmp_affinity_bind_thread(int which);
extern kmp_affin_mask_t *__kmp_affin_fullMask;
+extern kmp_affin_mask_t *__kmp_affin_origMask;
extern char *__kmp_cpuinfo_file;
+extern bool __kmp_affin_reset;
#endif /* KMP_AFFINITY_SUPPORTED */
@@ -967,7 +969,6 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space;
extern omp_memspace_handle_t const omp_const_mem_space;
extern omp_memspace_handle_t const omp_high_bw_mem_space;
extern omp_memspace_handle_t const omp_low_lat_mem_space;
-// Preview of target memory support
extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
@@ -987,7 +988,6 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
extern omp_allocator_handle_t const omp_pteam_mem_alloc;
extern omp_allocator_handle_t const omp_thread_mem_alloc;
-// Preview of target memory support
extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
@@ -1124,7 +1124,7 @@ extern void __kmp_init_target_mem();
#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
extern kmp_uint64 __kmp_ticks_per_msec;
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#define KMP_NOW() ((kmp_uint64)_rdtsc())
#else
#define KMP_NOW() __kmp_hardware_timestamp()
@@ -1334,7 +1334,10 @@ static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
char flag;
__asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
"setb %0"
- : "=r"(flag)
+ // The "=q" restraint means any register accessible as rl
+ // in 32-bit mode: a, b, c, and d;
+ // in 64-bit mode: any integer register
+ : "=q"(flag)
: "a"(timeLo), "d"(timeHi), "c"(hint)
:);
return flag;
@@ -1361,7 +1364,10 @@ static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
char flag;
__asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
"setb %0"
- : "=r"(flag)
+ // The "=q" restraint means any register accessible as rl
+ // in 32-bit mode: a, b, c, and d;
+ // in 64-bit mode: any integer register
+ : "=q"(flag)
: "a"(timeLo), "d"(timeHi), "c"(hint)
:);
return flag;
@@ -2548,11 +2554,22 @@ typedef union KMP_ALIGN_CACHE kmp_thread_data {
char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
} kmp_thread_data_t;
+typedef struct kmp_task_pri {
+ kmp_thread_data_t td;
+ kmp_int32 priority;
+ kmp_task_pri *next;
+} kmp_task_pri_t;
+
// Data for task teams which are used when tasking is enabled for the team
typedef struct kmp_base_task_team {
kmp_bootstrap_lock_t
tt_threads_lock; /* Lock used to allocate per-thread part of task team */
/* must be bootstrap lock since used at library shutdown*/
+
+ // TODO: check performance vs kmp_tas_lock_t
+ kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
+ kmp_task_pri_t *tt_task_pri_list;
+
kmp_task_team_t *tt_next; /* For linking the task team free list */
kmp_thread_data_t
*tt_threads_data; /* Array of per-thread structures for task team */
@@ -2564,6 +2581,7 @@ typedef struct kmp_base_task_team {
kmp_int32 tt_max_threads; // # entries allocated for threads_data array
kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
kmp_int32 tt_untied_task_encountered;
+ std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
// There is hidden helper thread encountered in this task team so that we must
// wait when waiting on task team
kmp_int32 tt_hidden_helper_task_encountered;
@@ -2973,6 +2991,15 @@ struct fortran_inx_info {
kmp_int32 data;
};
+// This list type exists to hold old __kmp_threads arrays so that
+// old references to them may complete while reallocation takes place when
+// expanding the array. The items in this list are kept alive until library
+// shutdown.
+typedef struct kmp_old_threads_list_t {
+ kmp_info_t **threads;
+ struct kmp_old_threads_list_t *next;
+} kmp_old_threads_list_t;
+
/* ------------------------------------------------------------------------ */
extern int __kmp_settings;
@@ -3036,6 +3063,8 @@ extern int __kmp_storage_map_verbose_specified;
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
extern kmp_cpuinfo_t __kmp_cpuinfo;
static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
+#elif KMP_OS_DARWIN && KMP_ARCH_AARCH64
+static inline bool __kmp_is_hybrid_cpu() { return true; }
#else
static inline bool __kmp_is_hybrid_cpu() { return false; }
#endif
@@ -3043,6 +3072,7 @@ static inline bool __kmp_is_hybrid_cpu() { return false; }
extern volatile int __kmp_init_serial;
extern volatile int __kmp_init_gtid;
extern volatile int __kmp_init_common;
+extern volatile int __kmp_need_register_serial;
extern volatile int __kmp_init_middle;
extern volatile int __kmp_init_parallel;
#if KMP_USE_MONITOR
@@ -3150,6 +3180,7 @@ extern int __kmp_tp_cached; /* whether threadprivate cache has been created
(__kmpc_threadprivate_cached()) */
extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
blocking (env setting) */
+extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
#if KMP_USE_MONITOR
extern int
__kmp_monitor_wakeups; /* number of times monitor wakes up per second */
@@ -3253,6 +3284,8 @@ extern int __kmp_teams_thread_limit;
/* the following are protected by the fork/join lock */
/* write: lock read: anytime */
extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
+/* Holds old arrays of __kmp_threads until library shutdown */
+extern kmp_old_threads_list_t *__kmp_old_threads_list;
/* read/write: lock */
extern volatile kmp_team_t *__kmp_team_pool;
extern volatile kmp_info_t *__kmp_thread_pool;
@@ -3451,11 +3484,6 @@ extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
#define __kmp_thread_free(th, ptr) \
___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
-#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
-#define KMP_INTERNAL_FREE(p) free(p)
-#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
-#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
-
extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
@@ -3601,8 +3629,18 @@ static inline void __kmp_assign_root_init_mask() {
r->r.r_affinity_assigned = TRUE;
}
}
+static inline void __kmp_reset_root_init_mask(int gtid) {
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_root_t *r = th->th.th_root;
+ if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
+ __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
+ KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask);
+ r->r.r_affinity_assigned = FALSE;
+ }
+}
#else /* KMP_AFFINITY_SUPPORTED */
#define __kmp_assign_root_init_mask() /* Nothing */
+static inline void __kmp_reset_root_init_mask(int gtid) {}
#endif /* KMP_AFFINITY_SUPPORTED */
// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
// format string is for affinity, so platforms that do not support
@@ -3865,6 +3903,11 @@ KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 numberOfSections);
+KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
+
KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
kmp_int32 schedtype, kmp_int32 *plastiter,
kmp_int *plower, kmp_int *pupper,
@@ -3878,6 +3921,9 @@ KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
void (*cpy_func)(void *, void *),
kmp_int32 didit);
+KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
+ void *cpy_data);
+
extern void KMPC_SET_NUM_THREADS(int arg);
extern void KMPC_SET_DYNAMIC(int flag);
extern void KMPC_SET_NESTED(int flag);
diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
index 414a27fb05..b9a8d49d8d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
@@ -138,6 +138,18 @@ const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
return "unknown";
}
+#if KMP_AFFINITY_SUPPORTED
+// If affinity is supported, check the affinity
+// verbose and warning flags before printing warning
+#define KMP_AFF_WARNING(...) \
+ if (__kmp_affinity_verbose || \
+ (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { \
+ KMP_WARNING(__VA_ARGS__); \
+ }
+#else
+#define KMP_AFF_WARNING KMP_WARNING
+#endif
+
////////////////////////////////////////////////////////////////////////////////
// kmp_hw_thread_t methods
int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
@@ -818,16 +830,16 @@ void kmp_topology_t::canonicalize() {
// First try core, then thread, then package
kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET};
for (auto g : gran_types) {
- if (__kmp_topology->get_equivalent_type(g) != KMP_HW_UNKNOWN) {
+ if (get_equivalent_type(g) != KMP_HW_UNKNOWN) {
gran_type = g;
break;
}
}
KMP_ASSERT(gran_type != KMP_HW_UNKNOWN);
// Warn user what granularity setting will be used instead
- KMP_WARNING(AffGranularityBad, "KMP_AFFINITY",
- __kmp_hw_get_catalog_string(__kmp_affinity_gran),
- __kmp_hw_get_catalog_string(gran_type));
+ KMP_AFF_WARNING(AffGranularityBad, "KMP_AFFINITY",
+ __kmp_hw_get_catalog_string(__kmp_affinity_gran),
+ __kmp_hw_get_catalog_string(gran_type));
__kmp_affinity_gran = gran_type;
}
#if KMP_GROUP_AFFINITY
@@ -839,12 +851,12 @@ void kmp_topology_t::canonicalize() {
// processor groups that cover a socket, then the runtime must
// restrict the granularity down to the processor group level.
if (__kmp_num_proc_groups > 1) {
- int gran_depth = __kmp_topology->get_level(gran_type);
- int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP);
+ int gran_depth = get_level(gran_type);
+ int proc_group_depth = get_level(KMP_HW_PROC_GROUP);
if (gran_depth >= 0 && proc_group_depth >= 0 &&
gran_depth < proc_group_depth) {
- KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY",
- __kmp_hw_get_catalog_string(__kmp_affinity_gran));
+ KMP_AFF_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY",
+ __kmp_hw_get_catalog_string(__kmp_affinity_gran));
__kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP;
}
}
@@ -966,16 +978,16 @@ bool kmp_topology_t::filter_hw_subset() {
if (equivalent_type != KMP_HW_UNKNOWN) {
__kmp_hw_subset->at(i).type = equivalent_type;
} else {
- KMP_WARNING(AffHWSubsetNotExistGeneric,
- __kmp_hw_get_catalog_string(type));
+ KMP_AFF_WARNING(AffHWSubsetNotExistGeneric,
+ __kmp_hw_get_catalog_string(type));
return false;
}
// Check to see if current layer has already been
// specified either directly or through an equivalent type
if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
- KMP_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type),
- __kmp_hw_get_catalog_string(specified[equivalent_type]));
+ KMP_AFF_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type),
+ __kmp_hw_get_catalog_string(specified[equivalent_type]));
return false;
}
specified[equivalent_type] = type;
@@ -985,8 +997,8 @@ bool kmp_topology_t::filter_hw_subset() {
if (max_count < 0 ||
(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
bool plural = (num > 1);
- KMP_WARNING(AffHWSubsetManyGeneric,
- __kmp_hw_get_catalog_string(type, plural));
+ KMP_AFF_WARNING(AffHWSubsetManyGeneric,
+ __kmp_hw_get_catalog_string(type, plural));
return false;
}
@@ -1008,21 +1020,21 @@ bool kmp_topology_t::filter_hw_subset() {
if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) {
if (item.num_attrs == 1) {
if (using_core_effs) {
- KMP_WARNING(AffHWSubsetIgnoringAttr, "efficiency");
+ KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "efficiency");
} else {
- KMP_WARNING(AffHWSubsetIgnoringAttr, "core_type");
+ KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "core_type");
}
using_core_effs = false;
using_core_types = false;
} else {
- KMP_WARNING(AffHWSubsetAttrsNonHybrid);
+ KMP_AFF_WARNING(AffHWSubsetAttrsNonHybrid);
return false;
}
}
// Check if using both core types and core efficiencies together
if (using_core_types && using_core_effs) {
- KMP_WARNING(AffHWSubsetIncompat, "core_type", "efficiency");
+ KMP_AFF_WARNING(AffHWSubsetIncompat, "core_type", "efficiency");
return false;
}
@@ -1058,7 +1070,7 @@ bool kmp_topology_t::filter_hw_subset() {
(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
kmp_str_buf_t buf;
__kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
- KMP_WARNING(AffHWSubsetManyGeneric, buf.str);
+ KMP_AFF_WARNING(AffHWSubsetManyGeneric, buf.str);
__kmp_str_buf_free(&buf);
return false;
}
@@ -1080,8 +1092,8 @@ bool kmp_topology_t::filter_hw_subset() {
}
kmp_str_buf_t buf;
__kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0);
- KMP_WARNING(AffHWSubsetIncompat,
- __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str);
+ KMP_AFF_WARNING(AffHWSubsetIncompat,
+ __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str);
__kmp_str_buf_free(&buf);
return false;
}
@@ -1093,7 +1105,7 @@ bool kmp_topology_t::filter_hw_subset() {
kmp_str_buf_t buf;
__kmp_hw_get_catalog_core_string(item.attr[j], &buf,
item.num[j] > 0);
- KMP_WARNING(AffHWSubsetAttrRepeat, buf.str);
+ KMP_AFF_WARNING(AffHWSubsetAttrRepeat, buf.str);
__kmp_str_buf_free(&buf);
return false;
}
@@ -1201,7 +1213,7 @@ bool kmp_topology_t::filter_hw_subset() {
// One last check that we shouldn't allow filtering entire machine
if (num_filtered == num_hw_threads) {
- KMP_WARNING(AffHWSubsetAllFiltered);
+ KMP_AFF_WARNING(AffHWSubsetAllFiltered);
__kmp_free(filtered);
return false;
}
@@ -1536,6 +1548,8 @@ int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
// internal topology object and set the layer ids for it. Each routine
// returns a boolean on whether it was successful at doing so.
kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
+// Original mask is a subset of full mask in multiple processor groups topology
+kmp_affin_mask_t *__kmp_affin_origMask = NULL;
#if KMP_USE_HWLOC
static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
@@ -1765,7 +1779,7 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
hw_thread_index = 0;
pu = NULL;
- while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
+ while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
int index = depth - 1;
bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
@@ -3353,10 +3367,7 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
}
if (__kmp_affinity_gran_levels >= (int)depth) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffThreadsMayMigrate);
- }
+ KMP_AFF_WARNING(AffThreadsMayMigrate);
}
// Run through the table, forming the masks for all threads on each core.
@@ -3443,11 +3454,7 @@ static int nextNewMask;
{ \
if (((_osId) > _maxOsId) || \
(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
- if (__kmp_affinity_verbose || \
- (__kmp_affinity_warnings && \
- (__kmp_affinity_type != affinity_none))) { \
- KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
- } \
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, _osId); \
} else { \
ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
} \
@@ -3498,11 +3505,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
// Copy the mask for that osId to the sum (union) mask.
if ((num > maxOsId) ||
(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, num);
KMP_CPU_ZERO(sumMask);
} else {
KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
@@ -3534,11 +3537,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
// Add the mask for that osId to the sum mask.
if ((num > maxOsId) ||
(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, num);
} else {
KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
setSize++;
@@ -3695,11 +3694,7 @@ static void __kmp_process_subplace_list(const char **scan,
if (**scan == '}' || **scan == ',') {
if ((start > maxOsId) ||
(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, start);
} else {
KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
(*setSize)++;
@@ -3728,11 +3723,7 @@ static void __kmp_process_subplace_list(const char **scan,
for (i = 0; i < count; i++) {
if ((start > maxOsId) ||
(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, start);
break; // don't proliferate warnings for large count
} else {
KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
@@ -3779,11 +3770,7 @@ static void __kmp_process_subplace_list(const char **scan,
for (i = 0; i < count; i++) {
if ((start > maxOsId) ||
(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, start);
break; // don't proliferate warnings for large count
} else {
KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
@@ -3825,10 +3812,7 @@ static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
KMP_ASSERT(num >= 0);
if ((num > maxOsId) ||
(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, num);
} else {
KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
(*setSize)++;
@@ -3945,11 +3929,8 @@ void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
(!KMP_CPU_ISSET(j + stride,
KMP_CPU_INDEX(osId2Mask, j + stride)))) {
- if ((__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) &&
- i < count - 1) {
- KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
+ if (i < count - 1) {
+ KMP_AFF_WARNING(AffIgnoreInvalidProcID, j + stride);
}
continue;
}
@@ -4072,8 +4053,13 @@ static void __kmp_aux_affinity_initialize(void) {
if (__kmp_affin_fullMask == NULL) {
KMP_CPU_ALLOC(__kmp_affin_fullMask);
}
+ if (__kmp_affin_origMask == NULL) {
+ KMP_CPU_ALLOC(__kmp_affin_origMask);
+ }
if (KMP_AFFINITY_CAPABLE()) {
__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
+ // Make a copy before possible expanding to the entire machine mask
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
if (__kmp_affinity_respect_mask) {
// Count the number of available processors.
unsigned i;
@@ -4085,11 +4071,7 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_avail_proc++;
}
if (__kmp_avail_proc > __kmp_xproc) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(ErrorInitializeAffinity);
- }
+ KMP_AFF_WARNING(ErrorInitializeAffinity);
__kmp_affinity_type = affinity_none;
KMP_AFFINITY_DISABLE();
return;
@@ -4111,6 +4093,10 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_avail_proc =
__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
#if KMP_OS_WINDOWS
+ if (__kmp_num_proc_groups <= 1) {
+ // Copy expanded full mask if topology has single processor group
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+ }
// Set the process affinity mask since threads' affinity
// masks must be subset of process mask in Windows* OS
__kmp_affin_fullMask->set_process_affinity(true);
@@ -4254,10 +4240,8 @@ static void __kmp_aux_affinity_initialize(void) {
// Early exit if topology could not be created
if (!__kmp_topology) {
- if (KMP_AFFINITY_CAPABLE() &&
- (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
- KMP_WARNING(ErrorInitializeAffinity);
+ if (KMP_AFFINITY_CAPABLE()) {
+ KMP_AFF_WARNING(ErrorInitializeAffinity);
}
if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 &&
__kmp_ncores > 0) {
@@ -4283,6 +4267,13 @@ static void __kmp_aux_affinity_initialize(void) {
if (__kmp_affinity_verbose)
__kmp_topology->print("KMP_AFFINITY");
bool filtered = __kmp_topology->filter_hw_subset();
+ if (filtered) {
+#if KMP_OS_WINDOWS
+ // Copy filtered full mask if topology has single processor group
+ if (__kmp_num_proc_groups <= 1)
+#endif
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+ }
if (filtered && __kmp_affinity_verbose)
__kmp_topology->print("KMP_HW_SUBSET");
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
@@ -4321,10 +4312,7 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_affinity_proclist, osId2Mask, maxIndex);
}
if (__kmp_affinity_num_masks == 0) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffNoValidProcID);
- }
+ KMP_AFF_WARNING(AffNoValidProcID);
__kmp_affinity_type = affinity_none;
__kmp_create_affinity_none_places();
return;
@@ -4374,9 +4362,7 @@ static void __kmp_aux_affinity_initialize(void) {
case affinity_balanced:
if (depth <= 1) {
- if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
- KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
- }
+ KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
__kmp_affinity_type = affinity_none;
__kmp_create_affinity_none_places();
return;
@@ -4393,9 +4379,7 @@ static void __kmp_aux_affinity_initialize(void) {
int nproc = ncores * maxprocpercore;
if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
- if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
- KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
- }
+ KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
__kmp_affinity_type = affinity_none;
return;
}
@@ -4506,6 +4490,10 @@ void __kmp_affinity_uninitialize(void) {
KMP_CPU_FREE(__kmp_affin_fullMask);
__kmp_affin_fullMask = NULL;
}
+ if (__kmp_affin_origMask != NULL) {
+ KMP_CPU_FREE(__kmp_affin_origMask);
+ __kmp_affin_origMask = NULL;
+ }
__kmp_affinity_num_masks = 0;
__kmp_affinity_type = affinity_default;
__kmp_affinity_num_places = 0;
diff --git a/contrib/libs/cxxsupp/openmp/kmp_alloc.cpp b/contrib/libs/cxxsupp/openmp/kmp_alloc.cpp
index 120cad17c2..e9aaedc538 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_alloc.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_alloc.cpp
@@ -1254,7 +1254,6 @@ static void **mk_hbw_preferred_hugetlb;
static void **mk_dax_kmem;
static void **mk_dax_kmem_all;
static void **mk_dax_kmem_preferred;
-// Preview of target memory support
static void *(*kmp_target_alloc_host)(size_t size, int device);
static void *(*kmp_target_alloc_shared)(size_t size, int device);
static void *(*kmp_target_alloc_device)(size_t size, int device);
@@ -1269,7 +1268,7 @@ static bool __kmp_target_mem_available;
MA == llvm_omp_target_shared_mem_alloc || \
MA == llvm_omp_target_device_mem_alloc)
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
static inline void chk_kind(void ***pkind) {
KMP_DEBUG_ASSERT(pkind);
if (*pkind) // symbol found
@@ -1280,7 +1279,7 @@ static inline void chk_kind(void ***pkind) {
void __kmp_init_memkind() {
// as of 2018-07-31 memkind does not support Windows*, exclude it for now
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
// use of statically linked memkind is problematic, as it depends on libnuma
kmp_mk_lib_name = "libmemkind.so";
h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
@@ -1364,7 +1363,7 @@ void __kmp_fini_memkind() {
mk_dax_kmem_preferred = NULL;
#endif
}
-// Preview of target memory support
+
void __kmp_init_target_mem() {
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
*(void **)(&kmp_target_alloc_shared) =
diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.cpp b/contrib/libs/cxxsupp/openmp/kmp_atomic.cpp
index 0bd7b1a41a..21c2c60bfb 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_atomic.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.cpp
@@ -2452,6 +2452,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
+ (void)new_value; \
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
}
@@ -2461,6 +2462,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
LCK_ID, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
+ (void)new_value; \
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
}
@@ -3162,6 +3164,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
+ (void)new_value; \
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
}
@@ -3171,6 +3174,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
LCK_ID, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
+ (void)new_value; \
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.h b/contrib/libs/cxxsupp/openmp/kmp_atomic.h
index 079b917285..19c02e9d25 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_atomic.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.h
@@ -251,6 +251,9 @@ struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t {
kmp_cmplx128_a4_t() : q() {}
+#if defined(__cplusplus) && (KMP_OS_WINDOWS)
+ kmp_cmplx128_a4_t(const std::complex<_Quad> &c128) : q(c128) {}
+#endif
kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {}
kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) {
@@ -314,6 +317,9 @@ struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t {
kmp_cmplx128_a16_t() : q() {}
+#if defined(__cplusplus) && (KMP_OS_WINDOWS)
+ kmp_cmplx128_a16_t(const std::complex<_Quad> &c128) : q(c128) {}
+#endif
kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {}
kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) {
diff --git a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
index ee05bb3587..1a718b45ff 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
@@ -2163,7 +2163,6 @@ void __kmp_join_barrier(int gtid) {
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team;
- kmp_uint nproc;
int tid;
#ifdef KMP_DEBUG
int team_id;
@@ -2176,12 +2175,14 @@ void __kmp_join_barrier(int gtid) {
itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
#endif
#endif /* USE_ITT_BUILD */
+#if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
+ int nproc = this_thr->th.th_team_nproc;
+#endif
KMP_MB();
// Get current info
team = this_thr->th.th_team;
- nproc = this_thr->th.th_team_nproc;
- KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
+ KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
tid = __kmp_tid_from_gtid(gtid);
#ifdef KMP_DEBUG
team_id = team->t.t_id;
@@ -2354,7 +2355,7 @@ void __kmp_join_barrier(int gtid) {
// Set arrive time to zero to be able to check it in
// __kmp_invoke_task(); the same is done inside the loop below
this_thr->th.th_bar_arrive_time = 0;
- for (kmp_uint i = 1; i < nproc; ++i) {
+ for (int i = 1; i < nproc; ++i) {
delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
other_threads[i]->th.th_bar_arrive_time = 0;
}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_csupport.cpp b/contrib/libs/cxxsupp/openmp/kmp_csupport.cpp
index e263558517..c932d450c8 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_csupport.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_csupport.cpp
@@ -354,9 +354,9 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
@ingroup PARALLEL
@param loc source location information
@param global_tid global thread number
-@param num_teams_lo lower bound on number of teams requested for the teams
+@param num_teams_lb lower bound on number of teams requested for the teams
construct
-@param num_teams_up upper bound on number of teams requested for the teams
+@param num_teams_ub upper bound on number of teams requested for the teams
construct
@param num_threads number of threads per team requested for the teams construct
@@ -632,6 +632,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
"team %p\n",
global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
}
+#if KMP_AFFINITY_SUPPORTED
+ if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(global_tid);
+ }
+#endif
} else {
if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
@@ -685,13 +690,13 @@ void __kmpc_flush(ident_t *loc) {
if (!__kmp_cpuinfo.flags.sse2) {
// CPU cannot execute SSE2 instructions.
} else {
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
_mm_mfence();
#elif KMP_COMPILER_MSVC
MemoryBarrier();
#else
__sync_synchronize();
-#endif // KMP_COMPILER_ICC
+#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
}
#endif // KMP_MIC
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
@@ -2021,6 +2026,11 @@ void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_aux_display_affinity(gtid, format);
}
@@ -2034,6 +2044,11 @@ size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_str_buf_init(&capture_buf);
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
if (buffer && buf_size) {
@@ -2224,6 +2239,61 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
}
}
+/* --------------------------------------------------------------------------*/
+/*!
+@ingroup THREADPRIVATE
+@param loc source location information
+@param gtid global thread number
+@param cpy_data pointer to the data to be saved/copied or 0
+@return the saved pointer to the data
+
+__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
+__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
+coming from single), and returns that pointer in all calls (for single thread
+it's not needed). This version doesn't do any actual data copying. Data copying
+has to be done somewhere else, e.g. inline in the generated code. Due to this,
+this function doesn't have any barrier at the end of the function, like
+__kmpc_copyprivate does, so generated code needs barrier after copying of all
+data was done.
+*/
+void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
+ void **data_ptr;
+
+ KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
+
+ KMP_MB();
+
+ data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
+
+ if (__kmp_env_consistency_check) {
+ if (loc == 0) {
+ KMP_WARNING(ConstructIdentInvalid);
+ }
+ }
+
+ // ToDo: Optimize the following barrier
+
+ if (cpy_data)
+ *data_ptr = cpy_data;
+
+#if OMPT_SUPPORT
+ ompt_frame_t *ompt_frame;
+ if (ompt_enabled.enabled) {
+ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
+ OMPT_STORE_RETURN_ADDRESS(gtid);
+ }
+#endif
+/* This barrier is not a barrier region boundary */
+#if USE_ITT_NOTIFY
+ __kmp_threads[gtid]->th.th_ident = loc;
+#endif
+ __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+
+ return *data_ptr;
+}
+
/* -------------------------------------------------------------------------- */
#define INIT_LOCK __kmp_init_user_lock_with_checks
@@ -4348,7 +4418,7 @@ void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
- free_allocator);
+ free_allocator);
}
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
diff --git a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
index f3407bf889..8acf3d429e 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
@@ -1964,9 +1964,22 @@ int __kmp_dispatch_next_algorithm(int gtid,
&(task_info->task_data), 0, codeptr); \
} \
}
+#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
+ if (ompt_enabled.ompt_callback_dispatch && status) { \
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
+ ompt_dispatch_chunk_t chunk; \
+ ompt_data_t instance = ompt_data_none; \
+ OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \
+ instance.ptr = &chunk; \
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \
+ &(team_info->parallel_data), &(task_info->task_data), \
+ ompt_dispatch_ws_loop_chunk, instance); \
+ }
// TODO: implement count
#else
#define OMPT_LOOP_END // no-op
+#define OMPT_LOOP_DISPATCH(lb, ub, st, status) // no-op
#endif
#if KMP_STATS_ENABLED
@@ -2142,6 +2155,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
+ OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
OMPT_LOOP_END;
KMP_STATS_LOOP_END;
return status;
@@ -2265,11 +2279,225 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
+ OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
OMPT_LOOP_END;
KMP_STATS_LOOP_END;
return status;
}
+/*!
+@ingroup WORK_SHARING
+@param loc source location information
+@param global_tid global thread number
+@return Zero if the parallel region is not active and this thread should execute
+all sections, non-zero otherwise.
+
+Beginning of sections construct.
+There are no implicit barriers in the "sections" calls, rather the compiler
+should introduce an explicit barrier if it is required.
+
+This implementation is based on __kmp_dispatch_init, using same constructs for
+shared data (we can't have sections nested directly in omp for loop, there
+should be a parallel region in between)
+*/
+kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
+
+ int active;
+ kmp_info_t *th;
+ kmp_team_t *team;
+ kmp_uint32 my_buffer_index;
+ dispatch_shared_info_template<kmp_int32> volatile *sh;
+
+ KMP_DEBUG_ASSERT(__kmp_init_serial);
+
+ if (!TCR_4(__kmp_init_parallel))
+ __kmp_parallel_initialize();
+ __kmp_resume_if_soft_paused();
+
+ /* setup data */
+ th = __kmp_threads[gtid];
+ team = th->th.th_team;
+ active = !team->t.t_serialized;
+ th->th.th_ident = loc;
+
+ KMP_COUNT_BLOCK(OMP_SECTIONS);
+ KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid));
+
+ if (active) {
+ // Setup sections in the same way as dynamic scheduled loops.
+ // We need one shared data: which section is to execute next.
+ // (in case parallel is not active, all sections will be executed on the
+ // same thread)
+ KMP_DEBUG_ASSERT(th->th.th_dispatch ==
+ &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
+
+ my_buffer_index = th->th.th_dispatch->th_disp_index++;
+
+ // reuse shared data structures from dynamic sched loops:
+ sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
+ &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
+ KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
+ my_buffer_index));
+
+ th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
+ th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
+
+ KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
+ "sh->buffer_index:%d\n",
+ gtid, my_buffer_index, sh->buffer_index));
+ __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
+ __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
+ // Note: KMP_WAIT() cannot be used there: buffer index and
+ // my_buffer_index are *always* 32-bit integers.
+ KMP_MB();
+ KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
+ "sh->buffer_index:%d\n",
+ gtid, my_buffer_index, sh->buffer_index));
+
+ th->th.th_dispatch->th_dispatch_pr_current =
+ nullptr; // sections construct doesn't need private data
+ th->th.th_dispatch->th_dispatch_sh_current =
+ CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
+ }
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
+ &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif
+ KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
+
+ return active;
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc source location information
+@param global_tid global thread number
+@param numberOfSections number of sections in the 'sections' construct
+@return unsigned [from 0 to n) - number (id) of the section to execute next on
+this thread. n (or any other number not in range) - nothing to execute on this
+thread
+*/
+
+kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 numberOfSections) {
+
+ KMP_TIME_PARTITIONED_BLOCK(OMP_sections);
+
+ kmp_info_t *th = __kmp_threads[gtid];
+#ifdef KMP_DEBUG
+ kmp_team_t *team = th->th.th_team;
+#endif
+
+ KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
+ numberOfSections));
+
+ // For serialized case we should not call this function:
+ KMP_DEBUG_ASSERT(!team->t.t_serialized);
+
+ dispatch_shared_info_template<kmp_int32> volatile *sh;
+
+ KMP_DEBUG_ASSERT(th->th.th_dispatch ==
+ &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
+
+ KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
+ sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
+ th->th.th_dispatch->th_dispatch_sh_current);
+ KMP_DEBUG_ASSERT(sh);
+
+ kmp_int32 sectionIndex = 0;
+ bool moreSectionsToExecute = true;
+
+ // Find section to execute:
+ sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
+ if (sectionIndex >= numberOfSections) {
+ moreSectionsToExecute = false;
+ }
+
+ // status == 0: no more sections to execute;
+ // OMPTODO: __kmpc_end_sections could be bypassed?
+ if (!moreSectionsToExecute) {
+ kmp_int32 num_done;
+
+ num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
+
+ if (num_done == th->th.th_team_nproc - 1) {
+ /* NOTE: release this buffer to be reused */
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ sh->u.s.num_done = 0;
+ sh->u.s.iteration = 0;
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ sh->buffer_index += __kmp_dispatch_num_buffers;
+ KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
+ sh->buffer_index));
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ } // if
+
+ th->th.th_dispatch->th_deo_fcn = NULL;
+ th->th.th_dispatch->th_dxo_fcn = NULL;
+ th->th.th_dispatch->th_dispatch_sh_current = NULL;
+ th->th.th_dispatch->th_dispatch_pr_current = NULL;
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dispatch) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_data_t instance = ompt_data_none;
+ instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
+ &(team_info->parallel_data), &(task_info->task_data),
+ ompt_dispatch_section, instance);
+ }
+#endif
+ KMP_POP_PARTITIONED_TIMER();
+ }
+
+ return sectionIndex;
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc source location information
+@param global_tid global thread number
+
+End of "sections" construct.
+Don't need to wait here: barrier is added separately when needed.
+*/
+void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
+
+ kmp_info_t *th = __kmp_threads[gtid];
+ int active = !th->th.th_team->t.t_serialized;
+
+ KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid));
+
+ if (!active) {
+ // In active case call finalization is done in __kmpc_next_section
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
+ &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif
+ KMP_POP_PARTITIONED_TIMER();
+ }
+
+ KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid));
+}
+
template <typename T>
static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
kmp_int32 *plastiter, T *plower, T *pupper,
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
index bf9ebf9b2e..6b332244c6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
@@ -238,6 +238,10 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
+ int gtid = __kmp_get_gtid();
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
return __kmp_aux_get_affinity(mask);
#endif
}
@@ -358,9 +362,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
gtid = __kmp_entry_gtid();
thread = __kmp_threads[gtid];
+#if KMP_AFFINITY_SUPPORTED
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
+#endif
// return thread -> th.th_team -> t.t_current_task[
// thread->th.th_info.ds.ds_tid ] -> icvs.nproc;
return thread->th.th_current_task->td_icvs.nproc;
@@ -509,6 +517,11 @@ void FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_DISPLAY_AFFINITY)(
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
ConvertedString cformat(format, size);
__kmp_aux_display_affinity(gtid, cformat.get());
#endif
@@ -537,6 +550,11 @@ size_t FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_CAPTURE_AFFINITY)(
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_str_buf_init(&capture_buf);
ConvertedString cformat(format, for_size);
num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf);
@@ -612,7 +630,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
+#if KMP_AFFINITY_SUPPORTED
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
+#endif
return __kmp_avail_proc;
#endif
}
@@ -802,9 +829,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
return __kmp_affinity_num_masks;
#endif
}
@@ -818,9 +852,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
return 0;
kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
@@ -844,9 +885,16 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
return;
kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
@@ -870,11 +918,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return -1;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
if (thread->th.th_current_place < 0)
return -1;
return thread->th.th_current_place;
@@ -890,11 +940,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
first_place = thread->th.th_first_place;
last_place = thread->th.th_last_place;
if (first_place < 0 || last_place < 0)
@@ -917,11 +969,13 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
first_place = thread->th.th_first_place;
last_place = thread->th.th_last_place;
if (first_place < 0 || last_place < 0)
@@ -1567,6 +1621,15 @@ void FTN_STDCALL FTN_DISPLAY_ENV(int verbose) {
#endif
}
+int FTN_STDCALL FTN_IN_EXPLICIT_TASK(void) {
+#ifdef KMP_STUB
+ return 0;
+#else
+ int gtid = __kmp_entry_gtid();
+ return __kmp_thread_from_gtid(gtid)->th.th_current_task->td_flags.tasktype;
+#endif
+}
+
// GCC compatibility (versioned symbols)
#ifdef KMP_USE_VERSION_SYMBOLS
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
index 66e1e1ecd2..d37c9c8602 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
@@ -134,6 +134,7 @@
#define FTN_PAUSE_RESOURCE_ALL omp_pause_resource_all
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS omp_get_supported_active_levels
#define FTN_DISPLAY_ENV omp_display_env
+#define FTN_IN_EXPLICIT_TASK omp_in_explicit_task
#define FTN_FULFILL_EVENT omp_fulfill_event
#define FTN_SET_NUM_TEAMS omp_set_num_teams
#define FTN_GET_MAX_TEAMS omp_get_max_teams
@@ -270,6 +271,7 @@
#define FTN_PAUSE_RESOURCE_ALL omp_pause_resource_all_
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS omp_get_supported_active_levels_
#define FTN_DISPLAY_ENV omp_display_env_
+#define FTN_IN_EXPLICIT_TASK omp_in_explicit_task_
#define FTN_FULFILL_EVENT omp_fulfill_event_
#define FTN_SET_NUM_TEAMS omp_set_num_teams_
#define FTN_GET_MAX_TEAMS omp_get_max_teams_
@@ -404,6 +406,7 @@
#define FTN_PAUSE_RESOURCE_ALL OMP_PAUSE_RESOURCE_ALL
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS OMP_GET_SUPPORTED_ACTIVE_LEVELS
#define FTN_DISPLAY_ENV OMP_DISPLAY_ENV
+#define FTN_IN_EXPLICIT_TASK OMP_IN_EXPLICIT_TASK
#define FTN_FULFILL_EVENT OMP_FULFILL_EVENT
#define FTN_SET_NUM_TEAMS OMP_SET_NUM_TEAMS
#define FTN_GET_MAX_TEAMS OMP_GET_MAX_TEAMS
@@ -540,6 +543,7 @@
#define FTN_PAUSE_RESOURCE_ALL OMP_PAUSE_RESOURCE_ALL_
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS OMP_GET_SUPPORTED_ACTIVE_LEVELS_
#define FTN_DISPLAY_ENV OMP_DISPLAY_ENV_
+#define FTN_IN_EXPLICIT_TASK OMP_IN_EXPLICIT_TASK_
#define FTN_FULFILL_EVENT OMP_FULFILL_EVENT_
#define FTN_SET_NUM_TEAMS OMP_SET_NUM_TEAMS_
#define FTN_GET_MAX_TEAMS OMP_GET_MAX_TEAMS_
diff --git a/contrib/libs/cxxsupp/openmp/kmp_global.cpp b/contrib/libs/cxxsupp/openmp/kmp_global.cpp
index 62bdac3c4b..04b63c72d6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_global.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_global.cpp
@@ -44,6 +44,7 @@ tsc_tick_count __kmp_stats_start_time;
volatile int __kmp_init_serial = FALSE;
volatile int __kmp_init_gtid = FALSE;
volatile int __kmp_init_common = FALSE;
+volatile int __kmp_need_register_serial = TRUE;
volatile int __kmp_init_middle = FALSE;
volatile int __kmp_init_parallel = FALSE;
volatile int __kmp_init_hidden_helper = FALSE;
@@ -154,6 +155,7 @@ int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1];
kmp_hier_sched_env_t __kmp_hier_scheds = {0, 0, NULL, NULL, NULL};
#endif
int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+bool __kmp_wpolicy_passive = false;
#if KMP_USE_MONITOR
int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS;
int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(KMP_DEFAULT_BLOCKTIME,
@@ -283,6 +285,7 @@ kmp_affin_mask_t *__kmp_affinity_masks = NULL;
unsigned __kmp_affinity_num_masks = 0;
char *__kmp_cpuinfo_file = NULL;
+bool __kmp_affin_reset = 0;
#endif /* KMP_AFFINITY_SUPPORTED */
@@ -316,7 +319,6 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
(omp_allocator_handle_t const)7;
omp_allocator_handle_t const omp_thread_mem_alloc =
(omp_allocator_handle_t const)8;
-// Preview of target memory support
omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
(omp_allocator_handle_t const)100;
omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
@@ -337,7 +339,6 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
(omp_memspace_handle_t const)3;
omp_memspace_handle_t const omp_low_lat_mem_space =
(omp_memspace_handle_t const)4;
-// Preview of target memory support
omp_memspace_handle_t const llvm_omp_target_host_mem_space =
(omp_memspace_handle_t const)100;
omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
@@ -426,7 +427,13 @@ int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */
// 0 = never yield;
// 1 = always yield (default);
// 2 = yield only if oversubscribed
+#if KMP_OS_DARWIN && KMP_ARCH_AARCH64
+// Set to 0 for environments where yield is slower
+kmp_int32 __kmp_use_yield = 0;
+#else
kmp_int32 __kmp_use_yield = 1;
+#endif
+
// This will be 1 if KMP_USE_YIELD environment variable was set explicitly
kmp_int32 __kmp_use_yield_exp_set = 0;
@@ -443,6 +450,7 @@ kmp_uint64 __kmp_pause_init = 1; // for tpause
KMP_ALIGN_CACHE
kmp_info_t **__kmp_threads = NULL;
kmp_root_t **__kmp_root = NULL;
+kmp_old_threads_list_t *__kmp_old_threads_list = NULL;
/* data read/written to often by primary threads */
KMP_ALIGN_CACHE
diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
index fff7305b57..8fcddc7108 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
@@ -1954,7 +1954,7 @@ static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
// We need a fence here, since we must ensure that no memory operations
// from later in this thread float above that read.
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
_mm_mfence();
#else
__sync_synchronize();
diff --git a/contrib/libs/cxxsupp/openmp/kmp_os.h b/contrib/libs/cxxsupp/openmp/kmp_os.h
index d71e9aecb3..02efaa1b26 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_os.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_os.h
@@ -17,6 +17,7 @@
#include <atomic>
#include <stdarg.h>
#include <stdlib.h>
+#include <string.h>
#define KMP_FTN_PLAIN 1
#define KMP_FTN_APPEND 2
@@ -53,8 +54,12 @@
#define KMP_COMPILER_GCC 0
#define KMP_COMPILER_CLANG 0
#define KMP_COMPILER_MSVC 0
+#define KMP_COMPILER_ICX 0
-#if defined(__INTEL_COMPILER)
+#if __INTEL_CLANG_COMPILER
+#undef KMP_COMPILER_ICX
+#define KMP_COMPILER_ICX 1
+#elif defined(__INTEL_COMPILER)
#undef KMP_COMPILER_ICC
#define KMP_COMPILER_ICC 1
#elif defined(__clang__)
@@ -82,10 +87,16 @@
#define KMP_GROUP_AFFINITY 0
#endif
+#if (KMP_OS_LINUX || (KMP_OS_FREEBSD && __FreeBSD_version >= 1301000))
+#define KMP_HAVE_SCHED_GETCPU 1
+#else
+#define KMP_HAVE_SCHED_GETCPU 0
+#endif
+
/* Check for quad-precision extension. */
#define KMP_HAVE_QUAD 0
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
/* _Quad is already defined for icc */
#undef KMP_HAVE_QUAD
#define KMP_HAVE_QUAD 1
@@ -334,6 +345,9 @@ extern "C" {
// Use a function like macro to imply that it must be followed by a semicolon
#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
#define KMP_FALLTHROUGH() [[fallthrough]]
+// icc cannot properly tell this attribute is absent so force off
+#elif KMP_COMPILER_ICC
+#define KMP_FALLTHROUGH() ((void)0)
#elif __has_cpp_attribute(clang::fallthrough)
#define KMP_FALLTHROUGH() [[clang::fallthrough]]
#elif __has_attribute(fallthrough) || __GNUC__ >= 7
@@ -448,8 +462,10 @@ enum kmp_mem_fence_type {
#pragma intrinsic(InterlockedExchangeAdd)
#pragma intrinsic(InterlockedCompareExchange)
#pragma intrinsic(InterlockedExchange)
+#if !(KMP_COMPILER_ICX && KMP_32_BIT_ARCH)
#pragma intrinsic(InterlockedExchange64)
#endif
+#endif
// Using InterlockedIncrement / InterlockedDecrement causes a library loading
// ordering problem, so we use InterlockedExchangeAdd instead.
@@ -842,8 +858,14 @@ static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,
(kmp_uint64)(sv))
#endif
+#if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800
+#define KMP_XCHG_FIXED8(p, v) \
+ __atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \
+ __ATOMIC_SEQ_CST)
+#else
#define KMP_XCHG_FIXED8(p, v) \
__sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v))
+#endif
#define KMP_XCHG_FIXED16(p, v) \
__sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v))
#define KMP_XCHG_FIXED32(p, v) \
@@ -852,15 +874,25 @@ static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,
__sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) {
- kmp_int32 tmp =
- __sync_lock_test_and_set((volatile kmp_uint32 *)(p), *(kmp_uint32 *)&v);
- return *(kmp_real32 *)&tmp;
+ volatile kmp_uint32 *up;
+ kmp_uint32 uv;
+ memcpy(&up, &p, sizeof(up));
+ memcpy(&uv, &v, sizeof(uv));
+ kmp_int32 tmp = __sync_lock_test_and_set(up, uv);
+ kmp_real32 ftmp;
+ memcpy(&ftmp, &tmp, sizeof(tmp));
+ return ftmp;
}
inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) {
- kmp_int64 tmp =
- __sync_lock_test_and_set((volatile kmp_uint64 *)(p), *(kmp_uint64 *)&v);
- return *(kmp_real64 *)&tmp;
+ volatile kmp_uint64 *up;
+ kmp_uint64 uv;
+ memcpy(&up, &p, sizeof(up));
+ memcpy(&uv, &v, sizeof(uv));
+ kmp_int64 tmp = __sync_lock_test_and_set(up, uv);
+ kmp_real64 dtmp;
+ memcpy(&dtmp, &tmp, sizeof(tmp));
+ return dtmp;
}
#else
@@ -1026,7 +1058,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#define KMP_MFENCE_() _mm_mfence()
#define KMP_SFENCE_() _mm_sfence()
#elif KMP_COMPILER_MSVC
diff --git a/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp b/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
index 34f8a01743..bfbff03bd6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_runtime.cpp
@@ -2222,11 +2222,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
} else
// only one notification scheme (either "submit" or "forking/joined", not both)
#endif /* USE_ITT_NOTIFY */
- if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
- __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
- // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
- __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
- }
+ if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
+ __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
+ // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
+ __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
+ }
}
#endif /* USE_ITT_BUILD */
@@ -2641,6 +2641,11 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
+#if KMP_AFFINITY_SUPPORTED
+ if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
#if OMPT_SUPPORT
int flags =
OMPT_INVOKER(fork_context) |
@@ -3276,7 +3281,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
__kmp_nested_proc_bind.bind_types[0], &r_icvs,
0 // argc
USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
- );
+ );
#if USE_DEBUGGER
// Non-NULL value should be assigned to make the debugger display the root
// team.
@@ -3313,7 +3318,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
__kmp_nested_proc_bind.bind_types[0], &r_icvs,
0 // argc
USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
- );
+ );
KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
root->r.r_hot_team = hot_team;
@@ -3669,11 +3674,16 @@ static int __kmp_expand_threads(int nNeed) {
__kmp_threads_capacity * sizeof(kmp_info_t *));
KMP_MEMCPY(newRoot, __kmp_root,
__kmp_threads_capacity * sizeof(kmp_root_t *));
+ // Put old __kmp_threads array on a list. Any ongoing references to the old
+ // list will be valid. This list is cleaned up at library shutdown.
+ kmp_old_threads_list_t *node =
+ (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t));
+ node->threads = __kmp_threads;
+ node->next = __kmp_old_threads_list;
+ __kmp_old_threads_list = node;
- kmp_info_t **temp_threads = __kmp_threads;
*(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
*(kmp_root_t * *volatile *)&__kmp_root = newRoot;
- __kmp_free(temp_threads);
added += newCapacity - __kmp_threads_capacity;
*(volatile int *)&__kmp_threads_capacity = newCapacity;
@@ -6960,10 +6970,12 @@ static void __kmp_do_serial_initialize(void) {
/* Initialize internal memory allocator */
__kmp_init_allocator();
- /* Register the library startup via an environment variable and check to see
- whether another copy of the library is already registered. */
-
- __kmp_register_library_startup();
+ /* Register the library startup via an environment variable or via mapped
+ shared memory file and check to see whether another copy of the library is
+ already registered. Since forked child process is often terminated, we
+ postpone the registration till middle initialization in the child */
+ if (__kmp_need_register_serial)
+ __kmp_register_library_startup();
/* TODO reinitialization of library */
if (TCR_4(__kmp_global.g.g_done)) {
@@ -7250,6 +7262,12 @@ static void __kmp_do_middle_initialize(void) {
KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
+ if (UNLIKELY(!__kmp_need_register_serial)) {
+ // We are in a forked child process. The registration was skipped during
+ // serial initialization in __kmp_atfork_child handler. Do it here.
+ __kmp_register_library_startup();
+ }
+
// Save the previous value for the __kmp_dflt_team_nth so that
// we can avoid some reinitialization if it hasn't changed.
prev_dflt_team_nth = __kmp_dflt_team_nth;
@@ -8101,6 +8119,15 @@ void __kmp_cleanup(void) {
__kmp_root = NULL;
__kmp_threads_capacity = 0;
+ // Free old __kmp_threads arrays if they exist.
+ kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
+ while (ptr) {
+ kmp_old_threads_list_t *next = ptr->next;
+ __kmp_free(ptr->threads);
+ __kmp_free(ptr);
+ ptr = next;
+ }
+
#if KMP_USE_DYNAMIC_LOCK
__kmp_cleanup_indirect_user_locks();
#else
@@ -8286,7 +8313,7 @@ void __kmp_aux_set_library(enum library_type arg) {
break;
case library_throughput:
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
- __kmp_dflt_blocktime = 200;
+ __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
break;
default:
KMP_FATAL(UnknownLibraryType, arg);
@@ -8707,7 +8734,8 @@ __kmp_determine_reduction_method(
KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
- ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
+ (loc && \
+ ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
retval = critical_reduce_block;
@@ -8953,19 +8981,16 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
}
// Release all the workers
- kmp_uint64 new_value; // new value for go
- new_value = team->t.b->go_release();
+ team->t.b->go_release();
KMP_MFENCE();
// Workers should see transition status 2 and move to 0; but may need to be
// woken up first
- size_t my_go_index;
int count = old_nthreads - 1;
while (count > 0) {
count = old_nthreads - 1;
for (int f = 1; f < old_nthreads; ++f) {
- my_go_index = f / team->t.b->threads_per_go;
if (other_threads[f]->th.th_used_in_team.load() != 0) {
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
diff --git a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
index 09e497e029..acd75448d2 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
@@ -101,7 +101,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
static kmp_int8 warn = 0;
- if (ompt_enabled.ompt_callback_work) {
+ if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
// Only fully initialize variables needed by OMPT if OMPT is enabled.
team_info = __ompt_get_teaminfo(0, NULL);
task_info = __ompt_get_task_info_object(0);
@@ -194,8 +194,13 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
// we are in DISTRIBUTE construct
schedtype += kmp_sch_static -
kmp_distribute_static; // AC: convert to usual schedule type
- tid = th->th.th_team->t.t_master_tid;
- team = th->th.th_team->t.t_parent;
+ if (th->th.th_team->t.t_serialized > 1) {
+ tid = 0;
+ team = th->th.th_team;
+ } else {
+ tid = th->th.th_team->t.t_master_tid;
+ team = th->th.th_team->t.t_parent;
+ }
} else {
tid = __kmp_tid_from_gtid(global_tid);
team = th->th.th_team;
@@ -433,6 +438,24 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), trip_count, codeptr);
}
+ if (ompt_enabled.ompt_callback_dispatch) {
+ ompt_dispatch_t dispatch_type;
+ ompt_data_t instance = ompt_data_none;
+ ompt_dispatch_chunk_t dispatch_chunk;
+ if (ompt_work_type == ompt_work_sections) {
+ dispatch_type = ompt_dispatch_section;
+ instance.ptr = codeptr;
+ } else {
+ OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
+ dispatch_type = (ompt_work_type == ompt_work_distribute)
+ ? ompt_dispatch_distribute_chunk
+ : ompt_dispatch_ws_loop_chunk;
+ instance.ptr = &dispatch_chunk;
+ }
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
+ &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
+ instance);
+ }
#endif
KMP_STATS_LOOP_END(OMP_loop_static_iterations);
@@ -445,7 +468,12 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
T *plower, T *pupper, T *pupperDist,
typename traits_t<T>::signed_t *pstride,
typename traits_t<T>::signed_t incr,
- typename traits_t<T>::signed_t chunk) {
+ typename traits_t<T>::signed_t chunk
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ,
+ void *codeptr
+#endif
+) {
KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
@@ -677,6 +705,26 @@ end:;
}
#endif
KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
+ &(task_info->task_data), 0, codeptr);
+ }
+ if (ompt_enabled.ompt_callback_dispatch) {
+ ompt_data_t instance = ompt_data_none;
+ ompt_dispatch_chunk_t dispatch_chunk;
+ OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
+ instance.ptr = &dispatch_chunk;
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
+ &(team_info->parallel_data), &(task_info->task_data),
+ ompt_dispatch_distribute_chunk, instance);
+ }
+ }
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
KMP_STATS_LOOP_END(OMP_distribute_iterations);
return;
}
@@ -882,6 +930,12 @@ void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
@}
*/
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
+#else
+#define OMPT_CODEPTR_ARG
+#endif
+
/*!
@ingroup WORK_SHARING
@param loc Source code location
@@ -910,7 +964,8 @@ void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
kmp_int32 *pupperD, kmp_int32 *pstride,
kmp_int32 incr, kmp_int32 chunk) {
__kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
- pupper, pupperD, pstride, incr, chunk);
+ pupper, pupperD, pstride, incr,
+ chunk OMPT_CODEPTR_ARG);
}
/*!
@@ -922,7 +977,8 @@ void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
kmp_uint32 *pupperD, kmp_int32 *pstride,
kmp_int32 incr, kmp_int32 chunk) {
__kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
- pupper, pupperD, pstride, incr, chunk);
+ pupper, pupperD, pstride, incr,
+ chunk OMPT_CODEPTR_ARG);
}
/*!
@@ -934,7 +990,8 @@ void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
kmp_int64 *pupperD, kmp_int64 *pstride,
kmp_int64 incr, kmp_int64 chunk) {
__kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
- pupper, pupperD, pstride, incr, chunk);
+ pupper, pupperD, pstride, incr,
+ chunk OMPT_CODEPTR_ARG);
}
/*!
@@ -946,7 +1003,8 @@ void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
kmp_uint64 *pupperD, kmp_int64 *pstride,
kmp_int64 incr, kmp_int64 chunk) {
__kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
- pupper, pupperD, pstride, incr, chunk);
+ pupper, pupperD, pstride, incr,
+ chunk OMPT_CODEPTR_ARG);
}
/*!
@}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_settings.cpp b/contrib/libs/cxxsupp/openmp/kmp_settings.cpp
index 112502fdce..38ff15461b 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_settings.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_settings.cpp
@@ -812,6 +812,7 @@ static void __kmp_stg_parse_wait_policy(char const *name, char const *value,
}
} else if (__kmp_str_match("PASSIVE", 1, value)) {
__kmp_library = library_throughput;
+ __kmp_wpolicy_passive = true; /* allow sleep while active tasking */
if (blocktime_str == NULL) {
// KMP_BLOCKTIME not specified, so set default to 0.
__kmp_dflt_blocktime = 0;
@@ -1245,13 +1246,25 @@ static void __kmp_stg_parse_num_hidden_helper_threads(char const *name,
// task
if (__kmp_hidden_helper_threads_num == 0) {
__kmp_enable_hidden_helper = FALSE;
+ } else {
+ // Since the main thread of hidden helper team dooes not participate
+ // in tasks execution let's increment the number of threads by one
+ // so that requested number of threads do actual job.
+ __kmp_hidden_helper_threads_num++;
}
} // __kmp_stg_parse_num_hidden_helper_threads
static void __kmp_stg_print_num_hidden_helper_threads(kmp_str_buf_t *buffer,
char const *name,
void *data) {
- __kmp_stg_print_int(buffer, name, __kmp_hidden_helper_threads_num);
+ if (__kmp_hidden_helper_threads_num == 0) {
+ __kmp_stg_print_int(buffer, name, __kmp_hidden_helper_threads_num);
+ } else {
+ KMP_DEBUG_ASSERT(__kmp_hidden_helper_threads_num > 1);
+ // Let's exclude the main thread of hidden helper team and print
+ // number of worker threads those do actual job.
+ __kmp_stg_print_int(buffer, name, __kmp_hidden_helper_threads_num - 1);
+ }
} // __kmp_stg_print_num_hidden_helper_threads
static void __kmp_stg_parse_use_hidden_helper(char const *name,
@@ -2156,6 +2169,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
int respect = 0;
int gran = 0;
int dups = 0;
+ int reset = 0;
bool set = false;
KMP_ASSERT(value != NULL);
@@ -2211,6 +2225,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
#define set_respect(val) _set_param(respect, *out_respect, val)
#define set_dups(val) _set_param(dups, *out_dups, val)
#define set_proclist(val) _set_param(proclist, *out_proclist, val)
+#define set_reset(val) _set_param(reset, __kmp_affin_reset, val)
#define set_gran(val, levels) \
{ \
@@ -2280,6 +2295,12 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
} else if (__kmp_match_str("norespect", buf, CCAST(const char **, &next))) {
set_respect(FALSE);
buf = next;
+ } else if (__kmp_match_str("reset", buf, CCAST(const char **, &next))) {
+ set_reset(TRUE);
+ buf = next;
+ } else if (__kmp_match_str("noreset", buf, CCAST(const char **, &next))) {
+ set_reset(FALSE);
+ buf = next;
} else if (__kmp_match_str("duplicates", buf,
CCAST(const char **, &next)) ||
__kmp_match_str("dups", buf, CCAST(const char **, &next))) {
@@ -2420,6 +2441,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
#undef set_warnings
#undef set_respect
#undef set_granularity
+#undef set_reset
__kmp_str_free(&buffer);
@@ -2551,6 +2573,11 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
} else {
__kmp_str_buf_print(buffer, "%s,", "norespect");
}
+ if (__kmp_affin_reset) {
+ __kmp_str_buf_print(buffer, "%s,", "reset");
+ } else {
+ __kmp_str_buf_print(buffer, "%s,", "noreset");
+ }
__kmp_str_buf_print(buffer, "granularity=%s,",
__kmp_hw_get_keyword(__kmp_affinity_gran, false));
}
@@ -5009,7 +5036,7 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
attr.set_core_type(KMP_HW_CORE_TYPE_CORE);
} else if (__kmp_str_match("intel_atom", -1, attr_ptr + 1)) {
attr.set_core_type(KMP_HW_CORE_TYPE_ATOM);
- }
+ } else
#endif
if (__kmp_str_match("eff", 3, attr_ptr + 1)) {
const char *number = attr_ptr + 1;
diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
index 501830eaa7..6c1d93a891 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
@@ -45,6 +45,9 @@ static void __kmp_init_node(kmp_depnode_t *node) {
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
#endif
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ __itt_sync_create(node, "OMP task dep node", NULL, 0);
+#endif
}
static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.h b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.h
index 99f182bbd0..ac6174afd3 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.h
@@ -25,6 +25,9 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
KMP_DEBUG_ASSERT(n >= 0);
if (n == 0) {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ __itt_sync_destroy(node);
+#endif
KMP_ASSERT(node->dn.nrefs == 0);
#if USE_FAST_MEMORY
__kmp_fast_free(thread, node);
@@ -125,11 +128,17 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
kmp_taskdata_t *next_taskdata;
for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
kmp_depnode_t *successor = p->node;
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ __itt_sync_releasing(successor);
+#endif
kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1;
// successor task can be NULL for wait_depends or because deps are still
// being processed
if (npredecessors == 0) {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ __itt_sync_acquired(successor);
+#endif
KMP_MB();
if (successor->dn.task) {
KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
diff --git a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
index e445438524..1622c6aea1 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_tasking.cpp
@@ -319,6 +319,144 @@ static void __kmp_realloc_task_deque(kmp_info_t *thread,
thread_data->td.td_deque_size = new_size;
}
+static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
+ kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t));
+ kmp_thread_data_t *thread_data = &l->td;
+ __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
+ thread_data->td.td_deque_last_stolen = -1;
+ KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
+ "for thread_data %p\n",
+ __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
+ thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
+ INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
+ thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
+ return l;
+}
+
+// The function finds the deque of priority tasks with given priority, or
+// allocates a new deque and put it into sorted (high -> low) list of deques.
+// Deques of non-default priority tasks are shared between all threads in team,
+// as opposed to per-thread deques of tasks with default priority.
+// The function is called under the lock task_team->tt.tt_task_pri_lock.
+static kmp_thread_data_t *
+__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
+ kmp_thread_data_t *thread_data;
+ kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
+ if (lst->priority == pri) {
+ // Found queue of tasks with given priority.
+ thread_data = &lst->td;
+ } else if (lst->priority < pri) {
+ // All current priority queues contain tasks with lower priority.
+ // Allocate new one for given priority tasks.
+ kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
+ thread_data = &list->td;
+ list->priority = pri;
+ list->next = lst;
+ task_team->tt.tt_task_pri_list = list;
+ } else { // task_team->tt.tt_task_pri_list->priority > pri
+ kmp_task_pri_t *next_queue = lst->next;
+ while (next_queue && next_queue->priority > pri) {
+ lst = next_queue;
+ next_queue = lst->next;
+ }
+ // lst->priority > pri && (next == NULL || pri >= next->priority)
+ if (next_queue == NULL) {
+ // No queue with pri priority, need to allocate new one.
+ kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
+ thread_data = &list->td;
+ list->priority = pri;
+ list->next = NULL;
+ lst->next = list;
+ } else if (next_queue->priority == pri) {
+ // Found queue of tasks with given priority.
+ thread_data = &next_queue->td;
+ } else { // lst->priority > pri > next->priority
+ // insert newly allocated between existed queues
+ kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
+ thread_data = &list->td;
+ list->priority = pri;
+ list->next = next_queue;
+ lst->next = list;
+ }
+ }
+ return thread_data;
+}
+
+// __kmp_push_priority_task: Add a task to the team's priority task deque
+static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskdata_t *taskdata,
+ kmp_task_team_t *task_team,
+ kmp_int32 pri) {
+ kmp_thread_data_t *thread_data = NULL;
+ KA_TRACE(20,
+ ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
+ gtid, taskdata, pri));
+
+ // Find task queue specific to priority value
+ kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
+ if (UNLIKELY(lst == NULL)) {
+ __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+ if (task_team->tt.tt_task_pri_list == NULL) {
+ // List of queues is still empty, allocate one.
+ kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
+ thread_data = &list->td;
+ list->priority = pri;
+ list->next = NULL;
+ task_team->tt.tt_task_pri_list = list;
+ } else {
+ // Other thread initialized a queue. Check if it fits and get thread_data.
+ thread_data = __kmp_get_priority_deque_data(task_team, pri);
+ }
+ __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+ } else {
+ if (lst->priority == pri) {
+ // Found queue of tasks with given priority.
+ thread_data = &lst->td;
+ } else {
+ __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+ thread_data = __kmp_get_priority_deque_data(task_team, pri);
+ __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+ }
+ }
+ KMP_DEBUG_ASSERT(thread_data);
+
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+ // Check if deque is full
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ if (__kmp_enable_task_throttling &&
+ __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
+ thread->th.th_current_task)) {
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "
+ "TASK_NOT_PUSHED for task %p\n",
+ gtid, taskdata));
+ return TASK_NOT_PUSHED;
+ } else {
+ // expand deque to push the task which is not allowed to execute
+ __kmp_realloc_task_deque(thread, thread_data);
+ }
+ }
+ KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
+ TASK_DEQUE_SIZE(thread_data->td));
+ // Push taskdata.
+ thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
+ // Wrap index.
+ thread_data->td.td_deque_tail =
+ (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
+ TCW_4(thread_data->td.td_deque_ntasks,
+ TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
+ KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
+ KMP_FSYNC_RELEASING(taskdata); // releasing child
+ KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "
+ "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
+ gtid, taskdata, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ task_team->tt.tt_num_task_pri++; // atomic inc
+ return TASK_SUCCESSFULLY_PUSHED;
+}
+
// __kmp_push_task: Add a task to the thread's deque
static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
kmp_info_t *thread = __kmp_threads[gtid];
@@ -371,6 +509,12 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
+ if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
+ __kmp_max_task_priority > 0) {
+ int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
+ return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
+ }
+
// Find tasking deque specific to encountering thread
thread_data = &task_team->tt.tt_threads_data[tid];
@@ -567,6 +711,8 @@ static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
ompt_frame_runtime | ompt_frame_framepointer;
task->ompt_task_info.frame.enter_frame_flags =
ompt_frame_runtime | ompt_frame_framepointer;
+ task->ompt_task_info.dispatch_chunk.start = 0;
+ task->ompt_task_info.dispatch_chunk.iterations = 0;
}
// __ompt_task_start:
@@ -728,6 +874,10 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
taskdata->td_flags.task_serial == 1);
KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
+ kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
+ // Clear data to not be re-used later by mistake.
+ task->data1.destructors = NULL;
+ task->data2.priority = 0;
taskdata->td_flags.freed = 1;
// deallocate the taskdata and shared variable blocks associated with this task
@@ -1599,6 +1749,18 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_start(task, current_task, gtid);
#endif
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
+ taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
+ ompt_data_t instance = ompt_data_none;
+ instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
+ &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
+ ompt_dispatch_taskloop_chunk, instance);
+ taskdata->ompt_task_info.dispatch_chunk = {0, 0};
+ }
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
@@ -1747,8 +1909,21 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
if (serialize_immediate)
new_taskdata->td_flags.task_serial = 1;
__kmp_invoke_task(gtid, new_task, current_task);
+ } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
+ __kmp_wpolicy_passive) {
+ kmp_info_t *this_thr = __kmp_threads[gtid];
+ kmp_team_t *team = this_thr->th.th_team;
+ kmp_int32 nthreads = this_thr->th.th_team_nproc;
+ for (int i = 0; i < nthreads; ++i) {
+ kmp_info_t *thread = team->t.t_threads[i];
+ if (thread == this_thr)
+ continue;
+ if (thread->th.th_sleep_loc != NULL) {
+ __kmp_null_resume_wrapper(thread);
+ break; // awake one thread at a time
+ }
+ }
}
-
return TASK_CURRENT_NOT_QUEUED;
}
@@ -2089,7 +2264,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
Flags for special info per task reduction item.
*/
typedef struct kmp_taskred_flags {
- /*! 1 - use lazy alloc/init (e.g. big objects, #tasks < #threads) */
+ /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
unsigned lazy_priv : 1;
unsigned reserved31 : 31;
} kmp_taskred_flags_t;
@@ -2667,6 +2842,105 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
#endif
}
+static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
+ kmp_task_team_t *task_team,
+ kmp_int32 is_constrained) {
+ kmp_task_t *task = NULL;
+ kmp_taskdata_t *taskdata;
+ kmp_taskdata_t *current;
+ kmp_thread_data_t *thread_data;
+ int ntasks = task_team->tt.tt_num_task_pri;
+ if (ntasks == 0) {
+ KA_TRACE(
+ 20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
+ return NULL;
+ }
+ do {
+ // decrement num_tasks to "reserve" one task to get for execution
+ if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
+ ntasks - 1))
+ break;
+ } while (ntasks > 0);
+ if (ntasks == 0) {
+ KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
+ __kmp_get_gtid()));
+ return NULL;
+ }
+ // We got a "ticket" to get a "reserved" priority task
+ int deque_ntasks;
+ kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
+ do {
+ KMP_ASSERT(list != NULL);
+ thread_data = &list->td;
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+ deque_ntasks = thread_data->td.td_deque_ntasks;
+ if (deque_ntasks == 0) {
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",
+ __kmp_get_gtid(), thread_data));
+ list = list->next;
+ }
+ } while (deque_ntasks == 0);
+ KMP_DEBUG_ASSERT(deque_ntasks);
+ int target = thread_data->td.td_deque_head;
+ current = __kmp_threads[gtid]->th.th_current_task;
+ taskdata = thread_data->td.td_deque[target];
+ if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
+ // Bump head pointer and Wrap.
+ thread_data->td.td_deque_head =
+ (target + 1) & TASK_DEQUE_MASK(thread_data->td);
+ } else {
+ if (!task_team->tt.tt_untied_task_encountered) {
+ // The TSC does not allow to steal victim task
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "
+ "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data, task_team, deque_ntasks, target,
+ thread_data->td.td_deque_tail));
+ task_team->tt.tt_num_task_pri++; // atomic inc, restore value
+ return NULL;
+ }
+ int i;
+ // walk through the deque trying to steal any task
+ taskdata = NULL;
+ for (i = 1; i < deque_ntasks; ++i) {
+ target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
+ taskdata = thread_data->td.td_deque[target];
+ if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
+ break; // found task to execute
+ } else {
+ taskdata = NULL;
+ }
+ }
+ if (taskdata == NULL) {
+ // No appropriate candidate found to execute
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(
+ 10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
+ "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data, task_team, deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+ task_team->tt.tt_num_task_pri++; // atomic inc, restore value
+ return NULL;
+ }
+ int prev = target;
+ for (i = i + 1; i < deque_ntasks; ++i) {
+ // shift remaining tasks in the deque left by 1
+ target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
+ thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
+ prev = target;
+ }
+ KMP_DEBUG_ASSERT(
+ thread_data->td.td_deque_tail ==
+ (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
+ thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
+ }
+ thread_data->td.td_deque_ntasks = deque_ntasks - 1;
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ task = KMP_TASKDATA_TO_TASK(taskdata);
+ return task;
+}
+
// __kmp_remove_my_task: remove a task from my own deque
static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
kmp_task_team_t *task_team,
@@ -2916,10 +3190,13 @@ static inline int __kmp_execute_tasks_template(
// getting tasks from target constructs
while (1) { // Inner loop to find a task and execute it
task = NULL;
- if (use_own_tasks) { // check on own queue first
+ if (task_team->tt.tt_num_task_pri) { // get priority task first
+ task = __kmp_get_priority_task(gtid, task_team, is_constrained);
+ }
+ if (task == NULL && use_own_tasks) { // check own queue next
task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
}
- if ((task == NULL) && (nthreads > 1)) { // Steal a task
+ if ((task == NULL) && (nthreads > 1)) { // Steal a task finally
int asleep = 1;
use_own_tasks = 0;
// Try to steal from the last place I stole from successfully.
@@ -3440,6 +3717,24 @@ static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
__kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
}
+// __kmp_free_task_pri_list:
+// Deallocates tasking deques used for priority tasks.
+// Only occurs at library shutdown.
+static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
+ __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+ if (task_team->tt.tt_task_pri_list != NULL) {
+ kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
+ while (list != NULL) {
+ kmp_task_pri_t *next = list->next;
+ __kmp_free_task_deque(&list->td);
+ __kmp_free(list);
+ list = next;
+ }
+ task_team->tt.tt_task_pri_list = NULL;
+ }
+ __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
+}
+
// __kmp_allocate_task_team:
// Allocates a task team associated with a specific team, taking it from
// the global task team free list if possible. Also initializes data
@@ -3471,6 +3766,7 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
// __kmp_thread_malloc because threads not around for kmp_reap_task_team.
task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
__kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
+ __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
// suppress race conditions detection on synchronization flags in debug mode
// this helps to analyze library internals eliminating false positives
@@ -3540,6 +3836,9 @@ void __kmp_reap_task_teams(void) {
if (task_team->tt.tt_threads_data != NULL) {
__kmp_free_task_threads_data(task_team);
}
+ if (task_team->tt.tt_task_pri_list != NULL) {
+ __kmp_free_task_pri_list(task_team);
+ }
__kmp_free(task_team);
}
__kmp_release_bootstrap_lock(&__kmp_task_team_lock);
@@ -3996,6 +4295,17 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
pass = pass << 1;
} while (!__kmp_give_task(thread, k, ptask, pass));
+
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
+ // awake at least one thread to execute given task
+ for (int i = 0; i < nthreads; ++i) {
+ thread = team->t.t_threads[i];
+ if (thread->th.th_sleep_loc != NULL) {
+ __kmp_null_resume_wrapper(thread);
+ break;
+ }
+ }
+ }
}
/*!
@@ -4371,6 +4681,12 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
#if OMPT_SUPPORT
__kmp_omp_taskloop_task(NULL, gtid, next_task,
codeptr_ra); // schedule new task
+#if OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dispatch) {
+ OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
+ lower, upper, st);
+ }
+#endif // OMPT_OPTIONAL
#else
__kmp_omp_task(gtid, next_task, true); // schedule new task
#endif
@@ -4800,7 +5116,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
@param grainsize Schedule value if specified
-@param modifer Modifier 'strict' for sched, 1 if present, 0 otherwise
+@param modifier Modifier 'strict' for sched, 1 if present, 0 otherwise
@param task_dup Tasks duplication routine
Execute the taskloop construct.
diff --git a/contrib/libs/cxxsupp/openmp/kmp_version.cpp b/contrib/libs/cxxsupp/openmp/kmp_version.cpp
index db2454c0f4..bb600c120d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_version.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_version.cpp
@@ -22,7 +22,9 @@
#define stringer(x) _stringer(x)
// Detect compiler.
-#if KMP_COMPILER_ICC
+#if KMP_COMPILER_ICX
+#define KMP_COMPILER __VERSION__
+#elif KMP_COMPILER_ICC
#if __INTEL_COMPILER == 1010
#define KMP_COMPILER "Intel(R) C++ Compiler 10.1"
#elif __INTEL_COMPILER == 1100
@@ -53,8 +55,10 @@
#define KMP_COMPILER "Intel(R) C++ Compiler 19.0"
#elif __INTEL_COMPILER == 1910
#define KMP_COMPILER "Intel(R) C++ Compiler 19.1"
-#elif __INTEL_COMPILER >= 9900
-#define KMP_COMPILER "Intel(R) C++ Compiler mainline"
+#elif __INTEL_COMPILER > 1910
+#define KMP_COMPILER \
+ "Intel(R) C++ Compiler Classic " stringer(__INTEL_COMPILER) "." stringer( \
+ __INTEL_COMPILER_UPDATE)
#endif
#elif KMP_COMPILER_CLANG
#define KMP_COMPILER \
diff --git a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
index b32cb15de1..3fcae5687d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
@@ -105,7 +105,7 @@ template <flag_type FlagType> class kmp_flag {
protected:
flag_properties t; /**< "Type" of the flag in loc */
kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
- kmp_uint32 num_waiting_threads; /**< #threads sleeping on this thread. */
+ kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */
std::atomic<bool> *sleepLoc;
public:
@@ -609,7 +609,8 @@ final_spin=FALSE)
continue;
// Don't suspend if there is a likelihood of new tasks being spawned.
- if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
+ if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) &&
+ !__kmp_wpolicy_passive)
continue;
#if KMP_USE_MONITOR
@@ -625,10 +626,6 @@ final_spin=FALSE)
if (!Sleepable)
continue;
- if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
- __kmp_pause_status != kmp_soft_paused)
- continue;
-
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
diff --git a/contrib/libs/cxxsupp/openmp/omp-tools.h b/contrib/libs/cxxsupp/openmp/omp-tools.h
index 5092174d66..6bae305c71 100644
--- a/contrib/libs/cxxsupp/openmp/omp-tools.h
+++ b/contrib/libs/cxxsupp/openmp/omp-tools.h
@@ -266,7 +266,10 @@ typedef enum ompt_scope_endpoint_t {
typedef enum ompt_dispatch_t {
ompt_dispatch_iteration = 1,
- ompt_dispatch_section = 2
+ ompt_dispatch_section = 2,
+ ompt_dispatch_ws_loop_chunk = 3,
+ ompt_dispatch_taskloop_chunk = 4,
+ ompt_dispatch_distribute_chunk = 5
} ompt_dispatch_t;
typedef enum ompt_sync_region_t {
@@ -303,7 +306,11 @@ typedef enum ompt_work_t {
ompt_work_workshare = 5,
ompt_work_distribute = 6,
ompt_work_taskloop = 7,
- ompt_work_scope = 8
+ ompt_work_scope = 8,
+ ompt_work_loop_static = 10,
+ ompt_work_loop_dynamic = 11,
+ ompt_work_loop_guided = 12,
+ ompt_work_loop_other = 13
} ompt_work_t;
typedef enum ompt_mutex_t {
@@ -554,6 +561,11 @@ typedef struct ompt_dependence_t {
ompt_dependence_type_t dependence_type;
} ompt_dependence_t;
+typedef struct ompt_dispatch_chunk_t {
+ uint64_t start;
+ uint64_t iterations;
+} ompt_dispatch_chunk_t;
+
typedef int (*ompt_enumerate_states_t) (
int current_state,
int *next_state,
@@ -745,7 +757,7 @@ typedef struct ompt_record_parallel_end_t {
} ompt_record_parallel_end_t;
typedef void (*ompt_callback_work_t) (
- ompt_work_t wstype,
+ ompt_work_t work_type,
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
@@ -754,7 +766,7 @@ typedef void (*ompt_callback_work_t) (
);
typedef struct ompt_record_work_t {
- ompt_work_t wstype;
+ ompt_work_t work_type;
ompt_scope_endpoint_t endpoint;
ompt_id_t parallel_id;
ompt_id_t task_id;
diff --git a/contrib/libs/cxxsupp/openmp/omp.h b/contrib/libs/cxxsupp/openmp/omp.h
index 2ddf4f630b..959e87359d 100644
--- a/contrib/libs/cxxsupp/openmp/omp.h
+++ b/contrib/libs/cxxsupp/openmp/omp.h
@@ -374,7 +374,6 @@
extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc;
- /* Preview of target memory support */
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
@@ -385,7 +384,6 @@
extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space;
- /* Preview of target memory support */
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space;
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space;
@@ -405,7 +403,6 @@
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
- /* Preview of target memory support */
llvm_omp_target_host_mem_alloc = 100,
llvm_omp_target_shared_mem_alloc = 101,
llvm_omp_target_device_mem_alloc = 102,
@@ -422,7 +419,6 @@
omp_const_mem_space = 2,
omp_high_bw_mem_space = 3,
omp_low_lat_mem_space = 4,
- /* Preview of target memory support */
llvm_omp_target_host_mem_space = 100,
llvm_omp_target_shared_mem_space = 101,
llvm_omp_target_device_mem_space = 102,
@@ -503,6 +499,12 @@
#pragma omp end declare variant
# endif
+ /* OpenMP 5.2 */
+ extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void);
+
+ /* LLVM Extensions */
+ extern void *llvm_omp_target_dynamic_shared_alloc();
+
# undef __KAI_KMPC_CONVENTION
# undef __KMP_IMP
diff --git a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
index 875d6921b7..f6c7022c8f 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
@@ -104,7 +104,7 @@
#define ompt_callback_reduction_implemented ompt_event_MAY_ALWAYS_OPTIONAL
-#define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_dispatch_implemented ompt_event_MAY_ALWAYS_OPTIONAL
#define ompt_callback_error_implemented ompt_event_MAY_ALWAYS_OPTIONAL
diff --git a/contrib/libs/cxxsupp/openmp/ompt-general.cpp b/contrib/libs/cxxsupp/openmp/ompt-general.cpp
index c1468c0c32..0bee7e77c8 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-general.cpp
+++ b/contrib/libs/cxxsupp/openmp/ompt-general.cpp
@@ -310,7 +310,8 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
HMODULE h = LoadLibrary(fname);
if (!h) {
- OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n", GetLastError());
+ OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
+ (unsigned)GetLastError());
} else {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Success. \n");
OMPT_VERBOSE_INIT_PRINT("Searching for ompt_start_tool in %s... ",
@@ -318,7 +319,7 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
if (!start_tool) {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
- GetLastError());
+ (unsigned)GetLastError());
} else
#else
#error Activation of OMPT is not supported on this platform.
@@ -789,7 +790,7 @@ OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
OMPT_API_ROUTINE int ompt_get_proc_id(void) {
if (!ompt_enabled.enabled || __kmp_get_gtid() < 0)
return -1;
-#if KMP_OS_LINUX
+#if KMP_HAVE_SCHED_GETCPU
return sched_getcpu();
#elif KMP_OS_WINDOWS
PROCESSOR_NUMBER pn;
diff --git a/contrib/libs/cxxsupp/openmp/ompt-internal.h b/contrib/libs/cxxsupp/openmp/ompt-internal.h
index 6665bb5e83..a85fe3835c 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-internal.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-internal.h
@@ -57,6 +57,7 @@ typedef struct {
ompt_data_t task_data;
struct kmp_taskdata *scheduling_parent;
int thread_num;
+ ompt_dispatch_chunk_t dispatch_chunk;
} ompt_task_info_t;
typedef struct {
diff --git a/contrib/libs/cxxsupp/openmp/ompt-specific.h b/contrib/libs/cxxsupp/openmp/ompt-specific.h
index 2fc7ee1c35..bd1e0d8991 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-specific.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-specific.h
@@ -89,6 +89,17 @@ inline void *__ompt_load_return_address(int gtid) {
? __ompt_load_return_address(gtid) \
: __builtin_return_address(0))
+#define OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, incr) \
+ do { \
+ if (incr > 0) { \
+ chunk.start = static_cast<uint64_t>(lb); \
+ chunk.iterations = static_cast<uint64_t>(((ub) - (lb)) / (incr) + 1); \
+ } else { \
+ chunk.start = static_cast<uint64_t>(ub); \
+ chunk.iterations = static_cast<uint64_t>(((lb) - (ub)) / -(incr) + 1); \
+ } \
+ } while (0)
+
//******************************************************************************
// inline functions
//******************************************************************************
diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_util.cpp b/contrib/libs/cxxsupp/openmp/z_Linux_util.cpp
index 5cd6ad6a03..91edf0254a 100644
--- a/contrib/libs/cxxsupp/openmp/z_Linux_util.cpp
+++ b/contrib/libs/cxxsupp/openmp/z_Linux_util.cpp
@@ -1297,7 +1297,13 @@ static void __kmp_atfork_child(void) {
__kmp_itt_reset(); // reset ITT's global state
#endif /* USE_ITT_BUILD */
- __kmp_serial_initialize();
+ {
+ // Child process often get terminated without any use of OpenMP. That might
+ // cause mapped shared memory file to be left unattended. Thus we postpone
+ // library registration till middle initialization in the child process.
+ __kmp_need_register_serial = FALSE;
+ __kmp_serial_initialize();
+ }
/* This is necessary to make sure no stale data is left around */
/* AC: customers complain that we use unsafe routines in the atfork
diff --git a/library/cpp/json/converter/converter.h b/library/cpp/json/converter/converter.h
index be86c31595..04f26b6946 100644
--- a/library/cpp/json/converter/converter.h
+++ b/library/cpp/json/converter/converter.h
@@ -1,3 +1,5 @@
+#pragma once
+
#include "library/cpp/json/writer/json_value.h"
#include <limits>
diff --git a/library/cpp/json/converter/ut/test_conversion.cpp b/library/cpp/json/converter/ut/test_conversion.cpp
index c2c857835a..84027dda75 100644
--- a/library/cpp/json/converter/ut/test_conversion.cpp
+++ b/library/cpp/json/converter/ut/test_conversion.cpp
@@ -30,19 +30,15 @@ namespace NJson {
static void TestEncoding(const TValues& values) {
for (const auto& [serializedValue, value] : values) {
- {
- const auto encodedValue = TConverter<T>::Encode(value);
- AssertJsonsEqual(encodedValue, serializedValue);
- }
+ const auto encodedValue = TConverter<T>::Encode(value);
+ AssertJsonsEqual(encodedValue, serializedValue);
}
}
static void TestDecoding(const TValues& values) {
for (const auto& [serializedValue, value] : values) {
- {
- const auto decodedValue = TConverter<T>::Decode(ReadJsonFastTree(serializedValue));
- UNIT_ASSERT_EQUAL(decodedValue, value);
- }
+ const auto decodedValue = TConverter<T>::Decode(ReadJsonFastTree(serializedValue));
+ UNIT_ASSERT_EQUAL(decodedValue, value);
}
}
@@ -59,11 +55,9 @@ namespace NJson {
TestDecoding(values);
for (const auto& [serializedValue, value] : values) {
- {
- const auto encodedValue = TConverter<T>::Encode(value);
- const auto decodedValue = TConverter<T>::Decode(encodedValue);
- UNIT_ASSERT_EQUAL(value, decodedValue);
- }
+ const auto encodedValue = TConverter<T>::Encode(value);
+ const auto decodedValue = TConverter<T>::Decode(encodedValue);
+ UNIT_ASSERT_EQUAL(value, decodedValue);
}
}
};