1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
|
//******************************************************************************
// include files
//******************************************************************************
#include "kmp.h"
#include "ompt-internal.h"
#include "ompt-specific.h"
//******************************************************************************
// macros
//******************************************************************************
#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0)
#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
#define OMPT_THREAD_ID_BITS 16
// 2013 08 24 - John Mellor-Crummey
// ideally, a thread should assign its own ids based on thread private data.
// however, the way the intel runtime reinitializes thread data structures
// when it creates teams makes it difficult to maintain persistent thread
// data. using a shared variable instead is simple. I leave it to intel to
// sort out how to implement a higher performance version in their runtime.
// when using fetch_and_add to generate the IDs, there isn't any reason to waste
// bits for thread id.
#if 0
#define NEXT_ID(id_ptr,tid) \
((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
#else
#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
#endif
//******************************************************************************
// private operations
//******************************************************************************
//----------------------------------------------------------
// traverse the team and task hierarchy
// note: __ompt_get_teaminfo and __ompt_get_taskinfo
// traverse the hierarchy similarly and need to be
// kept consistent
//----------------------------------------------------------
ompt_team_info_t *
__ompt_get_teaminfo(int depth, int *size)
{
kmp_info_t *thr = ompt_get_thread();
if (thr) {
kmp_team *team = thr->th.th_team;
if (team == NULL) return NULL;
ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
while(depth > 0) {
// next lightweight team (if any)
if (lwt) lwt = lwt->parent;
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && team) team=team->t.t_parent;
depth--;
}
if (lwt) {
// lightweight teams have one task
if (size) *size = 1;
// return team info for lightweight team
return &lwt->ompt_team_info;
} else if (team) {
// extract size from heavyweight team
if (size) *size = team->t.t_nproc;
// return team info for heavyweight team
return &team->t.ompt_team_info;
}
}
return NULL;
}
ompt_task_info_t *
__ompt_get_taskinfo(int depth)
{
ompt_task_info_t *info = NULL;
kmp_info_t *thr = ompt_get_thread();
if (thr) {
kmp_taskdata_t *taskdata = thr->th.th_current_task;
ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
while (depth > 0) {
// next lightweight team (if any)
if (lwt) lwt = lwt->parent;
// next heavyweight team (if any) after
// lightweight teams are exhausted
if (!lwt && taskdata) {
taskdata = taskdata->td_parent;
if (taskdata) {
lwt = LWT_FROM_TEAM(taskdata->td_team);
}
}
depth--;
}
if (lwt) {
info = &lwt->ompt_task_info;
} else if (taskdata) {
info = &taskdata->ompt_task_info;
}
}
return info;
}
//******************************************************************************
// interface operations
//******************************************************************************
//----------------------------------------------------------
// thread support
//----------------------------------------------------------
ompt_parallel_id_t
__ompt_thread_id_new()
{
static uint64_t ompt_thread_id = 1;
return NEXT_ID(&ompt_thread_id, 0);
}
void
__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid)
{
ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
void
__ompt_thread_end(ompt_thread_type_t thread_type, int gtid)
{
ompt_callbacks.ompt_callback(ompt_event_thread_end)(
thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
ompt_thread_id_t
__ompt_get_thread_id_internal()
{
// FIXME
// until we have a better way of assigning ids, use __kmp_get_gtid
// since the return value might be negative, we need to test that before
// assigning it to an ompt_thread_id_t, which is unsigned.
int id = __kmp_get_gtid();
assert(id >= 0);
return GTID_TO_OMPT_THREAD_ID(id);
}
//----------------------------------------------------------
// state support
//----------------------------------------------------------
void
__ompt_thread_assign_wait_id(void *variable)
{
int gtid = __kmp_gtid_get_specific();
kmp_info_t *ti = ompt_get_thread_gtid(gtid);
ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable;
}
ompt_state_t
__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id)
{
kmp_info_t *ti = ompt_get_thread();
if (ti) {
if (ompt_wait_id)
*ompt_wait_id = ti->th.ompt_thread_info.wait_id;
return ti->th.ompt_thread_info.state;
}
return ompt_state_undefined;
}
//----------------------------------------------------------
// idle frame support
//----------------------------------------------------------
void *
__ompt_get_idle_frame_internal(void)
{
kmp_info_t *ti = ompt_get_thread();
return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
}
//----------------------------------------------------------
// parallel region support
//----------------------------------------------------------
ompt_parallel_id_t
__ompt_parallel_id_new(int gtid)
{
static uint64_t ompt_parallel_id = 1;
return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
}
void *
__ompt_get_parallel_function_internal(int depth)
{
ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
void *function = info ? info->microtask : NULL;
return function;
}
ompt_parallel_id_t
__ompt_get_parallel_id_internal(int depth)
{
ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
ompt_parallel_id_t id = info ? info->parallel_id : 0;
return id;
}
int
__ompt_get_parallel_team_size_internal(int depth)
{
// initialize the return value with the error value.
// if there is a team at the specified depth, the default
// value will be overwritten the size of that team.
int size = -1;
(void) __ompt_get_teaminfo(depth, &size);
return size;
}
//----------------------------------------------------------
// lightweight task team support
//----------------------------------------------------------
void
__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
int gtid, void *microtask,
ompt_parallel_id_t ompt_pid)
{
lwt->ompt_team_info.parallel_id = ompt_pid;
lwt->ompt_team_info.microtask = microtask;
lwt->ompt_task_info.task_id = 0;
lwt->ompt_task_info.frame.reenter_runtime_frame = 0;
lwt->ompt_task_info.frame.exit_runtime_frame = 0;
lwt->ompt_task_info.function = NULL;
lwt->parent = 0;
}
void
__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr)
{
ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
lwt->parent = my_parent;
thr->th.th_team->t.ompt_serialized_team_info = lwt;
}
ompt_lw_taskteam_t *
__ompt_lw_taskteam_unlink(kmp_info_t *thr)
{
ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
return lwtask;
}
//----------------------------------------------------------
// task support
//----------------------------------------------------------
ompt_task_id_t
__ompt_task_id_new(int gtid)
{
static uint64_t ompt_task_id = 1;
return NEXT_ID(&ompt_task_id, gtid);
}
ompt_task_id_t
__ompt_get_task_id_internal(int depth)
{
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
ompt_task_id_t task_id = info ? info->task_id : 0;
return task_id;
}
void *
__ompt_get_task_function_internal(int depth)
{
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
void *function = info ? info->function : NULL;
return function;
}
ompt_frame_t *
__ompt_get_task_frame_internal(int depth)
{
ompt_task_info_t *info = __ompt_get_taskinfo(depth);
ompt_frame_t *frame = info ? frame = &info->frame : NULL;
return frame;
}
//----------------------------------------------------------
// team support
//----------------------------------------------------------
void
__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid)
{
team->t.ompt_team_info.parallel_id = ompt_pid;
}
|