1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
#pragma once
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
//===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// AMDHSA kernel descriptor definitions. For more information, visit
/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#include <cstddef>
#include <cstdint>
// Gets offset of specified member in specified type.
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
#endif // offsetof
// Creates enumeration entries used for packing bits into integers. Enumeration
// entries include bit shift amount, bit width, and bit mask.
#ifndef AMDHSA_BITS_ENUM_ENTRY
#define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
NAME ## _SHIFT = (SHIFT), \
NAME ## _WIDTH = (WIDTH), \
NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
#endif // AMDHSA_BITS_ENUM_ENTRY
// Gets bits for specified bit mask from specified source.
#ifndef AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
#endif // AMDHSA_BITS_GET
// Sets bits for specified bit mask in specified destination.
#ifndef AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL) \
DST &= ~MSK; \
DST |= ((VAL << MSK ## _SHIFT) & MSK)
#endif // AMDHSA_BITS_SET
namespace llvm {
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
enum : uint8_t {
FLOAT_ROUND_MODE_NEAR_EVEN = 0,
FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
FLOAT_ROUND_MODE_ZERO = 3,
};
// Floating point denorm modes. Must match hardware definition.
enum : uint8_t {
FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
FLOAT_DENORM_MODE_FLUSH_DST = 1,
FLOAT_DENORM_MODE_FLUSH_SRC = 2,
FLOAT_DENORM_MODE_FLUSH_NONE = 3,
};
// System VGPR workitem IDs. Must match hardware definition.
enum : uint8_t {
SYSTEM_VGPR_WORKITEM_ID_X = 0,
SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
};
// Compute program resource register 1. Must match hardware definition.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
COMPUTE_PGM_RSRC1(PRIV, 20, 1),
COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1
// Compute program resource register 2. Must match hardware definition.
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
};
#undef COMPUTE_PGM_RSRC2
// Compute program resource register 3. Must match hardware definition.
#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
};
#undef COMPUTE_PGM_RSRC3
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
};
#undef KERNEL_CODE_PROPERTY
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
uint32_t group_segment_fixed_size;
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint16_t kernel_code_properties;
uint8_t reserved2[6];
};
enum : uint32_t {
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
RESERVED0_OFFSET = 8,
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
RESERVED1_OFFSET = 24,
COMPUTE_PGM_RSRC3_OFFSET = 44,
COMPUTE_PGM_RSRC1_OFFSET = 48,
COMPUTE_PGM_RSRC2_OFFSET = 52,
KERNEL_CODE_PROPERTIES_OFFSET = 56,
RESERVED2_OFFSET = 58,
};
static_assert(
sizeof(kernel_descriptor_t) == 64,
"invalid size for kernel_descriptor_t");
static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
GROUP_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for group_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for private_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
"invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
"invalid offset for kernel_code_entry_byte_offset");
static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
"invalid offset for reserved1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
COMPUTE_PGM_RSRC3_OFFSET,
"invalid offset for compute_pgm_rsrc3");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
COMPUTE_PGM_RSRC1_OFFSET,
"invalid offset for compute_pgm_rsrc1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
COMPUTE_PGM_RSRC2_OFFSET,
"invalid offset for compute_pgm_rsrc2");
static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
KERNEL_CODE_PROPERTIES_OFFSET,
"invalid offset for kernel_code_properties");
static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
"invalid offset for reserved2");
} // end namespace amdhsa
} // end namespace llvm
#endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
|