contrib/libs/llvm16/include/llvm/Frontend/OpenMP/OMPGridValues.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#pragma once

#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif

//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Provides definitions for Target specific Grid Values
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
#define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H

namespace llvm {

namespace omp {

/// \brief Defines various target-specific GPU grid values that must be
///        consistent between host RTL (plugin), device RTL, and clang.
///        We can change grid values for a "fat" binary so that different
///        passes get the correct values when generating code for a
///        multi-target binary. Both amdgcn and nvptx values are stored in
///        this file. In the future, should there be differences between GPUs
///        of the same architecture, then simply make a different array and
///        use the new array name.
///
/// Example usage in clang:
///   const unsigned slot_size =
///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
///   #ifdef __AMDGPU__
///     #define GRIDVAL AMDGPUGridValues
///   #else
///     #define GRIDVAL NVPTXGridValues
///   #endif
///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
///   llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
///   #define GRIDVAL AMDGPUGridValues
///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
///   llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
///    #define GRIDVAL NVPTXGridValues
///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
///    llvm::omp::GRIDVAL().GV_Warp_Size
///

struct GV {
  /// The size reserved for data in a shared memory slot.
  unsigned GV_Slot_Size;
  /// The default value of maximum number of threads in a worker warp.
  unsigned GV_Warp_Size;

  constexpr unsigned warpSlotSize() const {
    return GV_Warp_Size * GV_Slot_Size;
  }

  /// the maximum number of teams.
  unsigned GV_Max_Teams;
  // The default number of teams in the absence of any other information.
  unsigned GV_Default_Num_Teams;

  // An alternative to the heavy data sharing infrastructure that uses global
  // memory is one that uses device __shared__ memory.  The amount of such space
  // (in bytes) reserved by the OpenMP runtime is noted here.
  unsigned GV_SimpleBufferSize;
  // The absolute maximum team size for a working group
  unsigned GV_Max_WG_Size;
  // The default maximum team size for a working group
  unsigned GV_Default_WG_Size;

  constexpr unsigned maxWarpNumber() const {
    return GV_Max_WG_Size / GV_Warp_Size;
  }
};

/// For AMDGPU GPUs
static constexpr GV AMDGPUGridValues64 = {
    256,  // GV_Slot_Size
    64,   // GV_Warp_Size
    (1 << 16), // GV_Max_Teams
    440,  // GV_Default_Num_Teams
    896,  // GV_SimpleBufferSize
    1024, // GV_Max_WG_Size,
    256,  // GV_Default_WG_Size
};

static constexpr GV AMDGPUGridValues32 = {
    256,  // GV_Slot_Size
    32,   // GV_Warp_Size
    (1 << 16), // GV_Max_Teams
    440,  // GV_Default_Num_Teams
    896,  // GV_SimpleBufferSize
    1024, // GV_Max_WG_Size,
    256,  // GV_Default_WG_Size
};

template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
  static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
  return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
}

/// For Nvidia GPUs
static constexpr GV NVPTXGridValues = {
    256,  // GV_Slot_Size
    32,   // GV_Warp_Size
    (1 << 16), // GV_Max_Teams
    3200, // GV_Default_Num_Teams
    896,  // GV_SimpleBufferSize
    1024, // GV_Max_WG_Size
    128,  // GV_Default_WG_Size
};

} // namespace omp
} // namespace llvm

#endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H

#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif