diff options
| author | thegeorg <[email protected]> | 2022-10-20 12:16:22 +0300 |
|---|---|---|
| committer | thegeorg <[email protected]> | 2022-10-20 12:16:22 +0300 |
| commit | da5ee816c1598acf602c1c42845b544878400d34 (patch) | |
| tree | 47c0acdeae9bbd5ceb1019b6c8e94ada327d7776 /contrib/libs/cxxsupp/openmp/kmp_affinity.cpp | |
| parent | d37715ef865ba1c48ca505f8b96151ae6d417657 (diff) | |
Update contrib/libs/cxxsupp/openmp to 15.0.2
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_affinity.cpp')
| -rw-r--r-- | contrib/libs/cxxsupp/openmp/kmp_affinity.cpp | 160 |
1 files changed, 74 insertions, 86 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp index 414a27fb057..b9a8d49d8da 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp +++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp @@ -138,6 +138,18 @@ const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) { return "unknown"; } +#if KMP_AFFINITY_SUPPORTED +// If affinity is supported, check the affinity +// verbose and warning flags before printing warning +#define KMP_AFF_WARNING(...) \ + if (__kmp_affinity_verbose || \ + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { \ + KMP_WARNING(__VA_ARGS__); \ + } +#else +#define KMP_AFF_WARNING KMP_WARNING +#endif + //////////////////////////////////////////////////////////////////////////////// // kmp_hw_thread_t methods int kmp_hw_thread_t::compare_ids(const void *a, const void *b) { @@ -818,16 +830,16 @@ void kmp_topology_t::canonicalize() { // First try core, then thread, then package kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET}; for (auto g : gran_types) { - if (__kmp_topology->get_equivalent_type(g) != KMP_HW_UNKNOWN) { + if (get_equivalent_type(g) != KMP_HW_UNKNOWN) { gran_type = g; break; } } KMP_ASSERT(gran_type != KMP_HW_UNKNOWN); // Warn user what granularity setting will be used instead - KMP_WARNING(AffGranularityBad, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran), - __kmp_hw_get_catalog_string(gran_type)); + KMP_AFF_WARNING(AffGranularityBad, "KMP_AFFINITY", + __kmp_hw_get_catalog_string(__kmp_affinity_gran), + __kmp_hw_get_catalog_string(gran_type)); __kmp_affinity_gran = gran_type; } #if KMP_GROUP_AFFINITY @@ -839,12 +851,12 @@ void kmp_topology_t::canonicalize() { // processor groups that cover a socket, then the runtime must // restrict the granularity down to the processor group level. if (__kmp_num_proc_groups > 1) { - int gran_depth = __kmp_topology->get_level(gran_type); - int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP); + int gran_depth = get_level(gran_type); + int proc_group_depth = get_level(KMP_HW_PROC_GROUP); if (gran_depth >= 0 && proc_group_depth >= 0 && gran_depth < proc_group_depth) { - KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran)); + KMP_AFF_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", + __kmp_hw_get_catalog_string(__kmp_affinity_gran)); __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP; } } @@ -966,16 +978,16 @@ bool kmp_topology_t::filter_hw_subset() { if (equivalent_type != KMP_HW_UNKNOWN) { __kmp_hw_subset->at(i).type = equivalent_type; } else { - KMP_WARNING(AffHWSubsetNotExistGeneric, - __kmp_hw_get_catalog_string(type)); + KMP_AFF_WARNING(AffHWSubsetNotExistGeneric, + __kmp_hw_get_catalog_string(type)); return false; } // Check to see if current layer has already been // specified either directly or through an equivalent type if (specified[equivalent_type] != KMP_HW_UNKNOWN) { - KMP_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type), - __kmp_hw_get_catalog_string(specified[equivalent_type])); + KMP_AFF_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type), + __kmp_hw_get_catalog_string(specified[equivalent_type])); return false; } specified[equivalent_type] = type; @@ -985,8 +997,8 @@ bool kmp_topology_t::filter_hw_subset() { if (max_count < 0 || (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { bool plural = (num > 1); - KMP_WARNING(AffHWSubsetManyGeneric, - __kmp_hw_get_catalog_string(type, plural)); + KMP_AFF_WARNING(AffHWSubsetManyGeneric, + __kmp_hw_get_catalog_string(type, plural)); return false; } @@ -1008,21 +1020,21 @@ bool kmp_topology_t::filter_hw_subset() { if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) { if (item.num_attrs == 1) { if (using_core_effs) { - KMP_WARNING(AffHWSubsetIgnoringAttr, "efficiency"); + KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "efficiency"); } else { - KMP_WARNING(AffHWSubsetIgnoringAttr, "core_type"); + KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "core_type"); } using_core_effs = false; using_core_types = false; } else { - KMP_WARNING(AffHWSubsetAttrsNonHybrid); + KMP_AFF_WARNING(AffHWSubsetAttrsNonHybrid); return false; } } // Check if using both core types and core efficiencies together if (using_core_types && using_core_effs) { - KMP_WARNING(AffHWSubsetIncompat, "core_type", "efficiency"); + KMP_AFF_WARNING(AffHWSubsetIncompat, "core_type", "efficiency"); return false; } @@ -1058,7 +1070,7 @@ bool kmp_topology_t::filter_hw_subset() { (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0); - KMP_WARNING(AffHWSubsetManyGeneric, buf.str); + KMP_AFF_WARNING(AffHWSubsetManyGeneric, buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1080,8 +1092,8 @@ bool kmp_topology_t::filter_hw_subset() { } kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0); - KMP_WARNING(AffHWSubsetIncompat, - __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str); + KMP_AFF_WARNING(AffHWSubsetIncompat, + __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1093,7 +1105,7 @@ bool kmp_topology_t::filter_hw_subset() { kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(item.attr[j], &buf, item.num[j] > 0); - KMP_WARNING(AffHWSubsetAttrRepeat, buf.str); + KMP_AFF_WARNING(AffHWSubsetAttrRepeat, buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1201,7 +1213,7 @@ bool kmp_topology_t::filter_hw_subset() { // One last check that we shouldn't allow filtering entire machine if (num_filtered == num_hw_threads) { - KMP_WARNING(AffHWSubsetAllFiltered); + KMP_AFF_WARNING(AffHWSubsetAllFiltered); __kmp_free(filtered); return false; } @@ -1536,6 +1548,8 @@ int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { // internal topology object and set the layer ids for it. Each routine // returns a boolean on whether it was successful at doing so. kmp_affin_mask_t *__kmp_affin_fullMask = NULL; +// Original mask is a subset of full mask in multiple processor groups topology +kmp_affin_mask_t *__kmp_affin_origMask = NULL; #if KMP_USE_HWLOC static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) { @@ -1765,7 +1779,7 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) { hw_thread_index = 0; pu = NULL; - while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) { + while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) { int index = depth - 1; bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask); kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index); @@ -3353,10 +3367,7 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); } if (__kmp_affinity_gran_levels >= (int)depth) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffThreadsMayMigrate); - } + KMP_AFF_WARNING(AffThreadsMayMigrate); } // Run through the table, forming the masks for all threads on each core. @@ -3443,11 +3454,7 @@ static int nextNewMask; { \ if (((_osId) > _maxOsId) || \ (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ - if (__kmp_affinity_verbose || \ - (__kmp_affinity_warnings && \ - (__kmp_affinity_type != affinity_none))) { \ - KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ - } \ + KMP_AFF_WARNING(AffIgnoreInvalidProcID, _osId); \ } else { \ ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ } \ @@ -3498,11 +3505,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, // Copy the mask for that osId to the sum (union) mask. if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); KMP_CPU_ZERO(sumMask); } else { KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); @@ -3534,11 +3537,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, // Add the mask for that osId to the sum mask. if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); } else { KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); setSize++; @@ -3695,11 +3694,7 @@ static void __kmp_process_subplace_list(const char **scan, if (**scan == '}' || **scan == ',') { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); (*setSize)++; @@ -3728,11 +3723,7 @@ static void __kmp_process_subplace_list(const char **scan, for (i = 0; i < count; i++) { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); break; // don't proliferate warnings for large count } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); @@ -3779,11 +3770,7 @@ static void __kmp_process_subplace_list(const char **scan, for (i = 0; i < count; i++) { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); break; // don't proliferate warnings for large count } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); @@ -3825,10 +3812,7 @@ static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, KMP_ASSERT(num >= 0); if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); (*setSize)++; @@ -3945,11 +3929,8 @@ void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) || (!KMP_CPU_ISSET(j + stride, KMP_CPU_INDEX(osId2Mask, j + stride)))) { - if ((__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) && - i < count - 1) { - KMP_WARNING(AffIgnoreInvalidProcID, j + stride); + if (i < count - 1) { + KMP_AFF_WARNING(AffIgnoreInvalidProcID, j + stride); } continue; } @@ -4072,8 +4053,13 @@ static void __kmp_aux_affinity_initialize(void) { if (__kmp_affin_fullMask == NULL) { KMP_CPU_ALLOC(__kmp_affin_fullMask); } + if (__kmp_affin_origMask == NULL) { + KMP_CPU_ALLOC(__kmp_affin_origMask); + } if (KMP_AFFINITY_CAPABLE()) { __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); + // Make a copy before possible expanding to the entire machine mask + __kmp_affin_origMask->copy(__kmp_affin_fullMask); if (__kmp_affinity_respect_mask) { // Count the number of available processors. unsigned i; @@ -4085,11 +4071,7 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_avail_proc++; } if (__kmp_avail_proc > __kmp_xproc) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(ErrorInitializeAffinity); - } + KMP_AFF_WARNING(ErrorInitializeAffinity); __kmp_affinity_type = affinity_none; KMP_AFFINITY_DISABLE(); return; @@ -4111,6 +4093,10 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_avail_proc = __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); #if KMP_OS_WINDOWS + if (__kmp_num_proc_groups <= 1) { + // Copy expanded full mask if topology has single processor group + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } // Set the process affinity mask since threads' affinity // masks must be subset of process mask in Windows* OS __kmp_affin_fullMask->set_process_affinity(true); @@ -4254,10 +4240,8 @@ static void __kmp_aux_affinity_initialize(void) { // Early exit if topology could not be created if (!__kmp_topology) { - if (KMP_AFFINITY_CAPABLE() && - (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(ErrorInitializeAffinity); + if (KMP_AFFINITY_CAPABLE()) { + KMP_AFF_WARNING(ErrorInitializeAffinity); } if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 && __kmp_ncores > 0) { @@ -4283,6 +4267,13 @@ static void __kmp_aux_affinity_initialize(void) { if (__kmp_affinity_verbose) __kmp_topology->print("KMP_AFFINITY"); bool filtered = __kmp_topology->filter_hw_subset(); + if (filtered) { +#if KMP_OS_WINDOWS + // Copy filtered full mask if topology has single processor group + if (__kmp_num_proc_groups <= 1) +#endif + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } if (filtered && __kmp_affinity_verbose) __kmp_topology->print("KMP_HW_SUBSET"); machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); @@ -4321,10 +4312,7 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_affinity_proclist, osId2Mask, maxIndex); } if (__kmp_affinity_num_masks == 0) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffNoValidProcID); - } + KMP_AFF_WARNING(AffNoValidProcID); __kmp_affinity_type = affinity_none; __kmp_create_affinity_none_places(); return; @@ -4374,9 +4362,7 @@ static void __kmp_aux_affinity_initialize(void) { case affinity_balanced: if (depth <= 1) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } + KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); __kmp_affinity_type = affinity_none; __kmp_create_affinity_none_places(); return; @@ -4393,9 +4379,7 @@ static void __kmp_aux_affinity_initialize(void) { int nproc = ncores * maxprocpercore; if ((nproc < 2) || (nproc < __kmp_avail_proc)) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } + KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); __kmp_affinity_type = affinity_none; return; } @@ -4506,6 +4490,10 @@ void __kmp_affinity_uninitialize(void) { KMP_CPU_FREE(__kmp_affin_fullMask); __kmp_affin_fullMask = NULL; } + if (__kmp_affin_origMask != NULL) { + KMP_CPU_FREE(__kmp_affin_origMask); + __kmp_affin_origMask = NULL; + } __kmp_affinity_num_masks = 0; __kmp_affinity_type = affinity_default; __kmp_affinity_num_places = 0; |
