aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/openmp/kmp_barrier.h
blob: ac28a13217e9b4e795f7af20294128446a7294e1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*
 * kmp_barrier.h
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef KMP_BARRIER_H
#define KMP_BARRIER_H

#include "kmp.h"
#include "kmp_i18n.h"

#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
#include <xmmintrin.h>
#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
#elif KMP_HAVE_ALIGNED_ALLOC
#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
#define KMP_ALIGNED_FREE(ptr) free(ptr)
#elif KMP_HAVE_POSIX_MEMALIGN
static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
  void *ptr;
  int n = posix_memalign(&ptr, alignment, size);
  if (n != 0) {
    if (ptr)
      free(ptr);
    return nullptr;
  }
  return ptr;
}
#define KMP_ALIGNED_FREE(ptr) free(ptr)
#elif KMP_HAVE__ALIGNED_MALLOC
#include <malloc.h>
#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
#else
#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
#endif

// Use four cache lines: MLC tends to prefetch the next or previous cache line
// creating a possible fake conflict between cores, so this is the only way to
// guarantee that no such prefetch can happen.
#ifndef KMP_FOURLINE_ALIGN_CACHE
#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
#endif

#define KMP_OPTIMIZE_FOR_REDUCTIONS 0

class distributedBarrier {
  struct flags_s {
    kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
  };

  struct go_s {
    std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
  };

  struct iter_s {
    kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
  };

  struct sleep_s {
    std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
  };

  void init(size_t nthr);
  void resize(size_t nthr);
  void computeGo(size_t n);
  void computeVarsForN(size_t n);

public:
  enum {
    MAX_ITERS = 3,
    MAX_GOS = 8,
    IDEAL_GOS = 4,
    IDEAL_CONTENTION = 16,
  };

  flags_s *flags[MAX_ITERS];
  go_s *go;
  iter_s *iter;
  sleep_s *sleep;

  size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
  size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
  // number of go signals each requiring one write per iteration
  size_t KMP_ALIGN_CACHE num_gos;
  // number of groups of gos
  size_t KMP_ALIGN_CACHE num_groups;
  // threads per go signal
  size_t KMP_ALIGN_CACHE threads_per_go;
  bool KMP_ALIGN_CACHE fix_threads_per_go;
  // threads per group
  size_t KMP_ALIGN_CACHE threads_per_group;
  // number of go signals in a group
  size_t KMP_ALIGN_CACHE gos_per_group;
  void *team_icvs;

  distributedBarrier() = delete;
  ~distributedBarrier() = delete;

  // Used instead of constructor to create aligned data
  static distributedBarrier *allocate(int nThreads) {
    distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
        sizeof(distributedBarrier), 4 * CACHE_LINE);
    if (!d) {
      KMP_FATAL(MemoryAllocFailed);
    }
    d->num_threads = 0;
    d->max_threads = 0;
    for (int i = 0; i < MAX_ITERS; ++i)
      d->flags[i] = NULL;
    d->go = NULL;
    d->iter = NULL;
    d->sleep = NULL;
    d->team_icvs = NULL;
    d->fix_threads_per_go = false;
    // calculate gos and groups ONCE on base size
    d->computeGo(nThreads);
    d->init(nThreads);
    return d;
  }

  static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }

  void update_num_threads(size_t nthr) { init(nthr); }

  bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
  size_t get_num_threads() { return num_threads; }
  kmp_uint64 go_release();
  void go_reset();
};

#endif // KMP_BARRIER_H