1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
15 
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
18 
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
24 
25 static const char *ompt_thread_t_values[] = {
26     "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27     "ompt_thread_other"};
28 
29 static const char *ompt_task_status_t_values[] = {
30     "ompt_task_UNDEFINED",
31     "ompt_task_complete", // 1
32     "ompt_task_yield", // 2
33     "ompt_task_cancel", // 3
34     "ompt_task_detach", // 4
35     "ompt_task_early_fulfill", // 5
36     "ompt_task_late_fulfill", // 6
37     "ompt_task_switch" // 7
38 };
39 static const char* ompt_cancel_flag_t_values[] = {
40   "ompt_cancel_parallel",
41   "ompt_cancel_sections",
42   "ompt_cancel_loop",
43   "ompt_cancel_taskgroup",
44   "ompt_cancel_activated",
45   "ompt_cancel_detected",
46   "ompt_cancel_discarded_task"
47 };
48 
49 static const char *ompt_dependence_type_t_values[] = {
50     "ompt_dependence_type_UNDEFINED",
51     "ompt_dependence_type_in", // 1
52     "ompt_dependence_type_out", // 2
53     "ompt_dependence_type_inout", // 3
54     "ompt_dependence_type_mutexinoutset", // 4
55     "ompt_dependence_type_source", // 5
56     "ompt_dependence_type_sink", // 6
57     "ompt_dependence_type_inoutset" // 7
58 };
59 
format_task_type(int type,char * buffer)60 static void format_task_type(int type, char *buffer) {
61   char *progress = buffer;
62   if (type & ompt_task_initial)
63     progress += sprintf(progress, "ompt_task_initial");
64   if (type & ompt_task_implicit)
65     progress += sprintf(progress, "ompt_task_implicit");
66   if (type & ompt_task_explicit)
67     progress += sprintf(progress, "ompt_task_explicit");
68   if (type & ompt_task_target)
69     progress += sprintf(progress, "ompt_task_target");
70   if (type & ompt_task_undeferred)
71     progress += sprintf(progress, "|ompt_task_undeferred");
72   if (type & ompt_task_untied)
73     progress += sprintf(progress, "|ompt_task_untied");
74   if (type & ompt_task_final)
75     progress += sprintf(progress, "|ompt_task_final");
76   if (type & ompt_task_mergeable)
77     progress += sprintf(progress, "|ompt_task_mergeable");
78   if (type & ompt_task_merged)
79     progress += sprintf(progress, "|ompt_task_merged");
80 }
81 
82 static ompt_set_callback_t ompt_set_callback;
83 static ompt_get_callback_t ompt_get_callback;
84 static ompt_get_state_t ompt_get_state;
85 static ompt_get_task_info_t ompt_get_task_info;
86 static ompt_get_task_memory_t ompt_get_task_memory;
87 static ompt_get_thread_data_t ompt_get_thread_data;
88 static ompt_get_parallel_info_t ompt_get_parallel_info;
89 static ompt_get_unique_id_t ompt_get_unique_id;
90 static ompt_finalize_tool_t ompt_finalize_tool;
91 static ompt_get_num_procs_t ompt_get_num_procs;
92 static ompt_get_num_places_t ompt_get_num_places;
93 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
94 static ompt_get_place_num_t ompt_get_place_num;
95 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
96 static ompt_get_proc_id_t ompt_get_proc_id;
97 static ompt_enumerate_states_t ompt_enumerate_states;
98 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
99 
print_ids(int level)100 static void print_ids(int level)
101 {
102   int task_type, thread_num;
103   ompt_frame_t *frame;
104   ompt_data_t *task_parallel_data;
105   ompt_data_t *task_data;
106   int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
107                                        &task_parallel_data, &thread_num);
108   char buffer[2048];
109   format_task_type(task_type, buffer);
110   if (frame)
111     printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
112            ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
113            "task_type=%s=%d, thread_num=%d\n",
114            ompt_get_thread_data()->value, level,
115            exists_task ? task_parallel_data->value : 0,
116            exists_task ? task_data->value : 0, frame->exit_frame.ptr,
117            frame->enter_frame.ptr, buffer, task_type, thread_num);
118 }
119 
120 #define get_frame_address(level) __builtin_frame_address(level)
121 
122 #define print_frame(level)                                                     \
123   printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n",                      \
124          ompt_get_thread_data()->value, level, get_frame_address(level))
125 
126 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
127 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
128   #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
129     #define print_frame_from_outlined_fn(level) print_frame(level+1)
130   #else
131     #define print_frame_from_outlined_fn(level) print_frame(level)
132   #endif
133 
134   #if defined(__clang__) && __clang_major__ >= 5
135     #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
136     #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
137   #endif
138 #endif
139 
140 // This macro helps to define a label at the current position that can be used
141 // to get the current address in the code.
142 //
143 // For print_current_address():
144 //   To reliably determine the offset between the address of the label and the
145 //   actual return address, we insert a NOP instruction as a jump target as the
146 //   compiler would otherwise insert an instruction that we can't control. The
147 //   instruction length is target dependent and is explained below.
148 //
149 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
150 // workaround for a bug in the Intel Compiler.)
151 #define define_ompt_label(id) \
152   {} \
153   __asm__("nop"); \
154 ompt_label_##id:
155 
156 // This macro helps to get the address of a label that is inserted by the above
157 // macro define_ompt_label(). The address is obtained with a GNU extension
158 // (&&label) that has been tested with gcc, clang and icc.
159 #define get_ompt_label_address(id) (&& ompt_label_##id)
160 
161 // This macro prints the exact address that a previously called runtime function
162 // returns to.
163 #define print_current_address(id) \
164   define_ompt_label(id) \
165   print_possible_return_addresses(get_ompt_label_address(id))
166 
167 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
168 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
169 // a MOV instruction for non-void runtime functions which is 3 bytes long.
170 #define print_possible_return_addresses(addr) \
171   printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
172          ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
173 #elif KMP_ARCH_PPC64
174 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
175 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
176 // functions Clang inserts a STW instruction (but only if compiling under
177 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
178 #define print_possible_return_addresses(addr) \
179   printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
180          ((char *)addr) - 8, ((char *)addr) - 12)
181 #elif KMP_ARCH_AARCH64
182 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
183 // store instruction (another 4 bytes long).
184 #define print_possible_return_addresses(addr) \
185   printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
186          ((char *)addr) - 4, ((char *)addr) - 8)
187 #elif KMP_ARCH_RISCV64
188 #if __riscv_compressed
189 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
190 // inserts a J instruction (targeting the successor basic block), which
191 // accounts for another 4 bytes. Finally, an additional J instruction may
192 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
193 // another branch).
194 #define print_possible_return_addresses(addr) \
195   printf("%" PRIu64 ": current_address=%p or %p\n", \
196          ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
197 #else
198 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
199 // inserts a J instruction (targeting the successor basic block), which
200 // accounts for another 4 bytes. Finally, an additional J instruction may
201 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
202 // another branch).
203 #define print_possible_return_addresses(addr) \
204   printf("%" PRIu64 ": current_address=%p or %p\n", \
205          ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
206 #endif
207 #else
208 #error Unsupported target architecture, cannot determine address offset!
209 #endif
210 
211 
212 // This macro performs a somewhat similar job to print_current_address(), except
213 // that it discards a certain number of nibbles from the address and only prints
214 // the most significant bits / nibbles. This can be used for cases where the
215 // return address can only be approximated.
216 //
217 // To account for overflows (ie the most significant bits / nibbles have just
218 // changed as we are a few bytes above the relevant power of two) the addresses
219 // of the "current" and of the "previous block" are printed.
220 #define print_fuzzy_address(id) \
221   define_ompt_label(id) \
222   print_fuzzy_address_blocks(get_ompt_label_address(id))
223 
224 // If you change this define you need to adapt all capture patterns in the tests
225 // to include or discard the new number of nibbles!
226 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
227 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
228 #define print_fuzzy_address_blocks(addr)                                       \
229   printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64              \
230          " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n",                          \
231          ompt_get_thread_data()->value,                                        \
232          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1,                   \
233          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES,                       \
234          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1,                   \
235          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
236 
237 #define register_callback_t(name, type)                                        \
238   do {                                                                         \
239     type f_##name = &on_##name;                                                \
240     if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never)  \
241       printf("0: Could not register callback '" #name "'\n");                  \
242   } while (0)
243 
244 #define register_callback(name) register_callback_t(name, name##_t)
245 
246 #ifndef USE_PRIVATE_TOOL
247 static void
on_ompt_callback_mutex_acquire(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)248 on_ompt_callback_mutex_acquire(
249   ompt_mutex_t kind,
250   unsigned int hint,
251   unsigned int impl,
252   ompt_wait_id_t wait_id,
253   const void *codeptr_ra)
254 {
255   switch(kind)
256   {
257     case ompt_mutex_lock:
258       printf("%" PRIu64 ":" _TOOL_PREFIX
259              " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
260              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
261              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
262       break;
263     case ompt_mutex_nest_lock:
264       printf("%" PRIu64 ":" _TOOL_PREFIX
265              " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
266              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
267              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
268       break;
269     case ompt_mutex_critical:
270       printf("%" PRIu64 ":" _TOOL_PREFIX
271              " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
272              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
273              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
274       break;
275     case ompt_mutex_atomic:
276       printf("%" PRIu64 ":" _TOOL_PREFIX
277              " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
278              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
279              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
280       break;
281     case ompt_mutex_ordered:
282       printf("%" PRIu64 ":" _TOOL_PREFIX
283              " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
284              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
285              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
286       break;
287     default:
288       break;
289   }
290 }
291 
292 static void
on_ompt_callback_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)293 on_ompt_callback_mutex_acquired(
294   ompt_mutex_t kind,
295   ompt_wait_id_t wait_id,
296   const void *codeptr_ra)
297 {
298   switch(kind)
299   {
300     case ompt_mutex_lock:
301       printf("%" PRIu64 ":" _TOOL_PREFIX
302              " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
303              ompt_get_thread_data()->value, wait_id, codeptr_ra);
304       break;
305     case ompt_mutex_nest_lock:
306       printf("%" PRIu64 ":" _TOOL_PREFIX
307              " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
308              ", codeptr_ra=%p \n",
309              ompt_get_thread_data()->value, wait_id, codeptr_ra);
310       break;
311     case ompt_mutex_critical:
312       printf("%" PRIu64 ":" _TOOL_PREFIX
313              " ompt_event_acquired_critical: wait_id=%" PRIu64
314              ", codeptr_ra=%p \n",
315              ompt_get_thread_data()->value, wait_id, codeptr_ra);
316       break;
317     case ompt_mutex_atomic:
318       printf("%" PRIu64 ":" _TOOL_PREFIX
319              " ompt_event_acquired_atomic: wait_id=%" PRIu64
320              ", codeptr_ra=%p \n",
321              ompt_get_thread_data()->value, wait_id, codeptr_ra);
322       break;
323     case ompt_mutex_ordered:
324       printf("%" PRIu64 ":" _TOOL_PREFIX
325              " ompt_event_acquired_ordered: wait_id=%" PRIu64
326              ", codeptr_ra=%p \n",
327              ompt_get_thread_data()->value, wait_id, codeptr_ra);
328       break;
329     default:
330       break;
331   }
332 }
333 
334 static void
on_ompt_callback_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)335 on_ompt_callback_mutex_released(
336   ompt_mutex_t kind,
337   ompt_wait_id_t wait_id,
338   const void *codeptr_ra)
339 {
340   switch(kind)
341   {
342     case ompt_mutex_lock:
343       printf("%" PRIu64 ":" _TOOL_PREFIX
344              " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
345              ompt_get_thread_data()->value, wait_id, codeptr_ra);
346       break;
347     case ompt_mutex_nest_lock:
348       printf("%" PRIu64 ":" _TOOL_PREFIX
349              " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
350              ", codeptr_ra=%p \n",
351              ompt_get_thread_data()->value, wait_id, codeptr_ra);
352       break;
353     case ompt_mutex_critical:
354       printf("%" PRIu64 ":" _TOOL_PREFIX
355              " ompt_event_release_critical: wait_id=%" PRIu64
356              ", codeptr_ra=%p \n",
357              ompt_get_thread_data()->value, wait_id, codeptr_ra);
358       break;
359     case ompt_mutex_atomic:
360       printf("%" PRIu64 ":" _TOOL_PREFIX
361              " ompt_event_release_atomic: wait_id=%" PRIu64
362              ", codeptr_ra=%p \n",
363              ompt_get_thread_data()->value, wait_id, codeptr_ra);
364       break;
365     case ompt_mutex_ordered:
366       printf("%" PRIu64 ":" _TOOL_PREFIX
367              " ompt_event_release_ordered: wait_id=%" PRIu64
368              ", codeptr_ra=%p \n",
369              ompt_get_thread_data()->value, wait_id, codeptr_ra);
370       break;
371     default:
372       break;
373   }
374 }
375 
376 static void
on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,ompt_wait_id_t wait_id,const void * codeptr_ra)377 on_ompt_callback_nest_lock(
378     ompt_scope_endpoint_t endpoint,
379     ompt_wait_id_t wait_id,
380     const void *codeptr_ra)
381 {
382   switch(endpoint)
383   {
384     case ompt_scope_begin:
385       printf("%" PRIu64 ":" _TOOL_PREFIX
386              " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
387              ", codeptr_ra=%p \n",
388              ompt_get_thread_data()->value, wait_id, codeptr_ra);
389       break;
390     case ompt_scope_end:
391       printf("%" PRIu64 ":" _TOOL_PREFIX
392              " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
393              ", codeptr_ra=%p \n",
394              ompt_get_thread_data()->value, wait_id, codeptr_ra);
395       break;
396     case ompt_scope_beginend:
397       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
398       exit(-1);
399   }
400 }
401 
402 static void
on_ompt_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)403 on_ompt_callback_sync_region(
404   ompt_sync_region_t kind,
405   ompt_scope_endpoint_t endpoint,
406   ompt_data_t *parallel_data,
407   ompt_data_t *task_data,
408   const void *codeptr_ra)
409 {
410   switch(endpoint)
411   {
412     case ompt_scope_begin:
413       switch(kind)
414       {
415         case ompt_sync_region_barrier:
416         case ompt_sync_region_barrier_implicit:
417         case ompt_sync_region_barrier_implicit_workshare:
418         case ompt_sync_region_barrier_implicit_parallel:
419         case ompt_sync_region_barrier_teams:
420         case ompt_sync_region_barrier_explicit:
421         case ompt_sync_region_barrier_implementation:
422           printf("%" PRIu64 ":" _TOOL_PREFIX
423                  " ompt_event_barrier_begin: parallel_id=%" PRIu64
424                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
425                  ompt_get_thread_data()->value, parallel_data->value,
426                  task_data->value, codeptr_ra);
427           print_ids(0);
428           break;
429         case ompt_sync_region_taskwait:
430           printf("%" PRIu64 ":" _TOOL_PREFIX
431                  " ompt_event_taskwait_begin: parallel_id=%" PRIu64
432                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
433                  ompt_get_thread_data()->value, parallel_data->value,
434                  task_data->value, codeptr_ra);
435           break;
436         case ompt_sync_region_taskgroup:
437           printf("%" PRIu64 ":" _TOOL_PREFIX
438                  " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
439                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
440                  ompt_get_thread_data()->value, parallel_data->value,
441                  task_data->value, codeptr_ra);
442           break;
443         case ompt_sync_region_reduction:
444           printf("ompt_sync_region_reduction should never be passed to "
445                  "on_ompt_callback_sync_region\n");
446           exit(-1);
447           break;
448       }
449       break;
450     case ompt_scope_end:
451       switch(kind)
452       {
453         case ompt_sync_region_barrier:
454         case ompt_sync_region_barrier_implicit:
455         case ompt_sync_region_barrier_explicit:
456         case ompt_sync_region_barrier_implicit_workshare:
457         case ompt_sync_region_barrier_implicit_parallel:
458         case ompt_sync_region_barrier_teams:
459         case ompt_sync_region_barrier_implementation:
460           printf("%" PRIu64 ":" _TOOL_PREFIX
461                  " ompt_event_barrier_end: parallel_id=%" PRIu64
462                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
463                  ompt_get_thread_data()->value,
464                  (parallel_data) ? parallel_data->value : 0, task_data->value,
465                  codeptr_ra);
466           break;
467         case ompt_sync_region_taskwait:
468           printf("%" PRIu64 ":" _TOOL_PREFIX
469                  " ompt_event_taskwait_end: parallel_id=%" PRIu64
470                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
471                  ompt_get_thread_data()->value,
472                  (parallel_data) ? parallel_data->value : 0, task_data->value,
473                  codeptr_ra);
474           break;
475         case ompt_sync_region_taskgroup:
476           printf("%" PRIu64 ":" _TOOL_PREFIX
477                  " ompt_event_taskgroup_end: parallel_id=%" PRIu64
478                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
479                  ompt_get_thread_data()->value,
480                  (parallel_data) ? parallel_data->value : 0, task_data->value,
481                  codeptr_ra);
482           break;
483         case ompt_sync_region_reduction:
484           printf("ompt_sync_region_reduction should never be passed to "
485                  "on_ompt_callback_sync_region\n");
486           exit(-1);
487           break;
488       }
489       break;
490     case ompt_scope_beginend:
491       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
492       exit(-1);
493   }
494 }
495 
496 static void
on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)497 on_ompt_callback_sync_region_wait(
498   ompt_sync_region_t kind,
499   ompt_scope_endpoint_t endpoint,
500   ompt_data_t *parallel_data,
501   ompt_data_t *task_data,
502   const void *codeptr_ra)
503 {
504   switch(endpoint)
505   {
506     case ompt_scope_begin:
507       switch(kind)
508       {
509         case ompt_sync_region_barrier:
510         case ompt_sync_region_barrier_implicit:
511         case ompt_sync_region_barrier_implicit_workshare:
512         case ompt_sync_region_barrier_implicit_parallel:
513         case ompt_sync_region_barrier_teams:
514         case ompt_sync_region_barrier_explicit:
515         case ompt_sync_region_barrier_implementation:
516           printf("%" PRIu64 ":" _TOOL_PREFIX
517                  " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
518                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
519                  ompt_get_thread_data()->value, parallel_data->value,
520                  task_data->value, codeptr_ra);
521           break;
522         case ompt_sync_region_taskwait:
523           printf("%" PRIu64 ":" _TOOL_PREFIX
524                  " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
525                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
526                  ompt_get_thread_data()->value, parallel_data->value,
527                  task_data->value, codeptr_ra);
528           break;
529         case ompt_sync_region_taskgroup:
530           printf("%" PRIu64 ":" _TOOL_PREFIX
531                  " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
532                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
533                  ompt_get_thread_data()->value, parallel_data->value,
534                  task_data->value, codeptr_ra);
535           break;
536         case ompt_sync_region_reduction:
537           printf("ompt_sync_region_reduction should never be passed to "
538                  "on_ompt_callback_sync_region_wait\n");
539           exit(-1);
540           break;
541       }
542       break;
543     case ompt_scope_end:
544       switch(kind)
545       {
546         case ompt_sync_region_barrier:
547         case ompt_sync_region_barrier_implicit:
548         case ompt_sync_region_barrier_implicit_workshare:
549         case ompt_sync_region_barrier_implicit_parallel:
550         case ompt_sync_region_barrier_teams:
551         case ompt_sync_region_barrier_explicit:
552         case ompt_sync_region_barrier_implementation:
553           printf("%" PRIu64 ":" _TOOL_PREFIX
554                  " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
555                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
556                  ompt_get_thread_data()->value,
557                  (parallel_data) ? parallel_data->value : 0, task_data->value,
558                  codeptr_ra);
559           break;
560         case ompt_sync_region_taskwait:
561           printf("%" PRIu64 ":" _TOOL_PREFIX
562                  " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
563                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
564                  ompt_get_thread_data()->value,
565                  (parallel_data) ? parallel_data->value : 0, task_data->value,
566                  codeptr_ra);
567           break;
568         case ompt_sync_region_taskgroup:
569           printf("%" PRIu64 ":" _TOOL_PREFIX
570                  " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
571                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
572                  ompt_get_thread_data()->value,
573                  (parallel_data) ? parallel_data->value : 0, task_data->value,
574                  codeptr_ra);
575           break;
576         case ompt_sync_region_reduction:
577           printf("ompt_sync_region_reduction should never be passed to "
578                  "on_ompt_callback_sync_region_wait\n");
579           exit(-1);
580           break;
581       }
582       break;
583     case ompt_scope_beginend:
584       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
585       exit(-1);
586   }
587 }
588 
on_ompt_callback_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)589 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
590                                        ompt_scope_endpoint_t endpoint,
591                                        ompt_data_t *parallel_data,
592                                        ompt_data_t *task_data,
593                                        const void *codeptr_ra) {
594   switch (endpoint) {
595   case ompt_scope_begin:
596     printf("%" PRIu64 ":" _TOOL_PREFIX
597            " ompt_event_reduction_begin: parallel_id=%" PRIu64
598            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
599            ompt_get_thread_data()->value,
600            (parallel_data) ? parallel_data->value : 0, task_data->value,
601            codeptr_ra);
602     break;
603   case ompt_scope_end:
604     printf("%" PRIu64 ":" _TOOL_PREFIX
605            " ompt_event_reduction_end: parallel_id=%" PRIu64
606            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
607            ompt_get_thread_data()->value,
608            (parallel_data) ? parallel_data->value : 0, task_data->value,
609            codeptr_ra);
610     break;
611   case ompt_scope_beginend:
612     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
613     exit(-1);
614   }
615 }
616 
617 static void
on_ompt_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)618 on_ompt_callback_flush(
619     ompt_data_t *thread_data,
620     const void *codeptr_ra)
621 {
622   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
623          thread_data->value, codeptr_ra);
624 }
625 
626 static void
on_ompt_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)627 on_ompt_callback_cancel(
628     ompt_data_t *task_data,
629     int flags,
630     const void *codeptr_ra)
631 {
632   const char* first_flag_value;
633   const char* second_flag_value;
634   if(flags & ompt_cancel_parallel)
635     first_flag_value = ompt_cancel_flag_t_values[0];
636   else if(flags & ompt_cancel_sections)
637     first_flag_value = ompt_cancel_flag_t_values[1];
638   else if(flags & ompt_cancel_loop)
639     first_flag_value = ompt_cancel_flag_t_values[2];
640   else if(flags & ompt_cancel_taskgroup)
641     first_flag_value = ompt_cancel_flag_t_values[3];
642 
643   if(flags & ompt_cancel_activated)
644     second_flag_value = ompt_cancel_flag_t_values[4];
645   else if(flags & ompt_cancel_detected)
646     second_flag_value = ompt_cancel_flag_t_values[5];
647   else if(flags & ompt_cancel_discarded_task)
648     second_flag_value = ompt_cancel_flag_t_values[6];
649 
650   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
651          ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
652          ompt_get_thread_data()->value, task_data->value, first_flag_value,
653          second_flag_value, flags, codeptr_ra);
654 }
655 
656 static void
on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int flags)657 on_ompt_callback_implicit_task(
658     ompt_scope_endpoint_t endpoint,
659     ompt_data_t *parallel_data,
660     ompt_data_t *task_data,
661     unsigned int team_size,
662     unsigned int thread_num,
663     int flags)
664 {
665   switch(endpoint)
666   {
667     case ompt_scope_begin:
668       if(task_data->ptr)
669         printf("%s\n", "0: task_data initially not null");
670       task_data->value = ompt_get_unique_id();
671 
672       //there is no parallel_begin callback for implicit parallel region
673       //thus it is initialized in initial task
674       if(flags & ompt_task_initial)
675       {
676         char buffer[2048];
677 
678         format_task_type(flags, buffer);
679         // Only check initial task not created by teams construct
680         if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
681           printf("%s\n", "0: parallel_data initially not null");
682         parallel_data->value = ompt_get_unique_id();
683         printf("%" PRIu64 ":" _TOOL_PREFIX
684                " ompt_event_initial_task_begin: parallel_id=%" PRIu64
685                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
686                ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
687                ompt_get_thread_data()->value, parallel_data->value,
688                task_data->value, team_size, thread_num, flags);
689       } else {
690         printf("%" PRIu64 ":" _TOOL_PREFIX
691                " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
692                ", task_id=%" PRIu64 ", team_size=%" PRIu32
693                ", thread_num=%" PRIu32 "\n",
694                ompt_get_thread_data()->value, parallel_data->value,
695                task_data->value, team_size, thread_num);
696       }
697 
698       break;
699     case ompt_scope_end:
700       if(flags & ompt_task_initial){
701         printf("%" PRIu64 ":" _TOOL_PREFIX
702                " ompt_event_initial_task_end: parallel_id=%" PRIu64
703                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
704                ", index=%" PRIu32 "\n",
705                ompt_get_thread_data()->value,
706                (parallel_data) ? parallel_data->value : 0, task_data->value,
707                team_size, thread_num);
708       } else {
709         printf("%" PRIu64 ":" _TOOL_PREFIX
710                " ompt_event_implicit_task_end: parallel_id=%" PRIu64
711                ", task_id=%" PRIu64 ", team_size=%" PRIu32
712                ", thread_num=%" PRIu32 "\n",
713                ompt_get_thread_data()->value,
714                (parallel_data) ? parallel_data->value : 0, task_data->value,
715                team_size, thread_num);
716       }
717       break;
718     case ompt_scope_beginend:
719       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
720       exit(-1);
721   }
722 }
723 
724 static void
on_ompt_callback_lock_init(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)725 on_ompt_callback_lock_init(
726   ompt_mutex_t kind,
727   unsigned int hint,
728   unsigned int impl,
729   ompt_wait_id_t wait_id,
730   const void *codeptr_ra)
731 {
732   switch(kind)
733   {
734     case ompt_mutex_lock:
735       printf("%" PRIu64 ":" _TOOL_PREFIX
736              " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
737              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
738              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
739       break;
740     case ompt_mutex_nest_lock:
741       printf("%" PRIu64 ":" _TOOL_PREFIX
742              " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
743              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
744              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
745       break;
746     default:
747       break;
748   }
749 }
750 
751 static void
on_ompt_callback_lock_destroy(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)752 on_ompt_callback_lock_destroy(
753   ompt_mutex_t kind,
754   ompt_wait_id_t wait_id,
755   const void *codeptr_ra)
756 {
757   switch(kind)
758   {
759     case ompt_mutex_lock:
760       printf("%" PRIu64 ":" _TOOL_PREFIX
761              " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
762              ompt_get_thread_data()->value, wait_id, codeptr_ra);
763       break;
764     case ompt_mutex_nest_lock:
765       printf("%" PRIu64 ":" _TOOL_PREFIX
766              " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
767              ", codeptr_ra=%p \n",
768              ompt_get_thread_data()->value, wait_id, codeptr_ra);
769       break;
770     default:
771       break;
772   }
773 }
774 
775 static void
on_ompt_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)776 on_ompt_callback_work(
777   ompt_work_t wstype,
778   ompt_scope_endpoint_t endpoint,
779   ompt_data_t *parallel_data,
780   ompt_data_t *task_data,
781   uint64_t count,
782   const void *codeptr_ra)
783 {
784   switch(endpoint)
785   {
786     case ompt_scope_begin:
787       switch(wstype)
788       {
789         case ompt_work_loop:
790           printf("%" PRIu64 ":" _TOOL_PREFIX
791                  " ompt_event_loop_begin: parallel_id=%" PRIu64
792                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
793                  "\n",
794                  ompt_get_thread_data()->value, parallel_data->value,
795                  task_data->value, codeptr_ra, count);
796           break;
797         case ompt_work_sections:
798           printf("%" PRIu64 ":" _TOOL_PREFIX
799                  " ompt_event_sections_begin: parallel_id=%" PRIu64
800                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
801                  "\n",
802                  ompt_get_thread_data()->value, parallel_data->value,
803                  task_data->value, codeptr_ra, count);
804           break;
805         case ompt_work_single_executor:
806           printf("%" PRIu64 ":" _TOOL_PREFIX
807                  " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
808                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
809                  "\n",
810                  ompt_get_thread_data()->value, parallel_data->value,
811                  task_data->value, codeptr_ra, count);
812           break;
813         case ompt_work_single_other:
814           printf("%" PRIu64 ":" _TOOL_PREFIX
815                  " ompt_event_single_others_begin: parallel_id=%" PRIu64
816                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
817                  ompt_get_thread_data()->value, parallel_data->value,
818                  task_data->value, codeptr_ra, count);
819           break;
820         case ompt_work_workshare:
821           //impl
822           break;
823         case ompt_work_distribute:
824           printf("%" PRIu64 ":" _TOOL_PREFIX
825                  " ompt_event_distribute_begin: parallel_id=%" PRIu64
826                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
827                  "\n",
828                  ompt_get_thread_data()->value, parallel_data->value,
829                  task_data->value, codeptr_ra, count);
830           break;
831         case ompt_work_taskloop:
832           //impl
833           printf("%" PRIu64 ":" _TOOL_PREFIX
834                  " ompt_event_taskloop_begin: parallel_id=%" PRIu64
835                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
836                  "\n",
837                  ompt_get_thread_data()->value, parallel_data->value,
838                  task_data->value, codeptr_ra, count);
839           break;
840         case ompt_work_scope:
841           printf("%" PRIu64 ":" _TOOL_PREFIX
842                  " ompt_event_scope_begin: parallel_id=%" PRIu64
843                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
844                  "\n",
845                  ompt_get_thread_data()->value, parallel_data->value,
846                  task_data->value, codeptr_ra, count);
847           break;
848       }
849       break;
850     case ompt_scope_end:
851       switch(wstype)
852       {
853         case ompt_work_loop:
854           printf("%" PRIu64 ":" _TOOL_PREFIX
855                  " ompt_event_loop_end: parallel_id=%" PRIu64
856                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
857                  ompt_get_thread_data()->value, parallel_data->value,
858                  task_data->value, codeptr_ra, count);
859           break;
860         case ompt_work_sections:
861           printf("%" PRIu64 ":" _TOOL_PREFIX
862                  " ompt_event_sections_end: parallel_id=%" PRIu64
863                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
864                  ompt_get_thread_data()->value, parallel_data->value,
865                  task_data->value, codeptr_ra, count);
866           break;
867         case ompt_work_single_executor:
868           printf("%" PRIu64 ":" _TOOL_PREFIX
869                  " ompt_event_single_in_block_end: parallel_id=%" PRIu64
870                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
871                  ompt_get_thread_data()->value, parallel_data->value,
872                  task_data->value, codeptr_ra, count);
873           break;
874         case ompt_work_single_other:
875           printf("%" PRIu64 ":" _TOOL_PREFIX
876                  " ompt_event_single_others_end: parallel_id=%" PRIu64
877                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
878                  ompt_get_thread_data()->value, parallel_data->value,
879                  task_data->value, codeptr_ra, count);
880           break;
881         case ompt_work_workshare:
882           //impl
883           break;
884         case ompt_work_distribute:
885           printf("%" PRIu64 ":" _TOOL_PREFIX
886                  " ompt_event_distribute_end: parallel_id=%" PRIu64
887                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
888                  "\n",
889                  ompt_get_thread_data()->value, parallel_data->value,
890                  task_data->value, codeptr_ra, count);
891           break;
892         case ompt_work_taskloop:
893           //impl
894           printf("%" PRIu64 ":" _TOOL_PREFIX
895                  " ompt_event_taskloop_end: parallel_id=%" PRIu64
896                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
897                  "\n",
898                  ompt_get_thread_data()->value, parallel_data->value,
899                  task_data->value, codeptr_ra, count);
900           break;
901         case ompt_work_scope:
902           printf("%" PRIu64 ":" _TOOL_PREFIX
903                  " ompt_event_scope_end: parallel_id=%" PRIu64
904                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
905                  "\n",
906                  ompt_get_thread_data()->value, parallel_data->value,
907                  task_data->value, codeptr_ra, count);
908           break;
909       }
910       break;
911     case ompt_scope_beginend:
912       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
913       exit(-1);
914   }
915 }
916 
on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)917 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
918                                     ompt_data_t *parallel_data,
919                                     ompt_data_t *task_data,
920                                     const void *codeptr_ra) {
921   switch(endpoint)
922   {
923     case ompt_scope_begin:
924       printf("%" PRIu64 ":" _TOOL_PREFIX
925              " ompt_event_masked_begin: parallel_id=%" PRIu64
926              ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
927              ompt_get_thread_data()->value, parallel_data->value,
928              task_data->value, codeptr_ra);
929       break;
930     case ompt_scope_end:
931       printf("%" PRIu64 ":" _TOOL_PREFIX
932              " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
933              ", codeptr_ra=%p\n",
934              ompt_get_thread_data()->value, parallel_data->value,
935              task_data->value, codeptr_ra);
936       break;
937     case ompt_scope_beginend:
938       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
939       exit(-1);
940   }
941 }
942 
on_ompt_callback_parallel_begin(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)943 static void on_ompt_callback_parallel_begin(
944     ompt_data_t *encountering_task_data,
945     const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
946     uint32_t requested_team_size, int flag, const void *codeptr_ra) {
947   if(parallel_data->ptr)
948     printf("0: parallel_data initially not null\n");
949   parallel_data->value = ompt_get_unique_id();
950   int invoker = flag & 0xF;
951   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
952   const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
953   printf("%" PRIu64 ":" _TOOL_PREFIX
954          " ompt_event_%s_begin: parent_task_id=%" PRIu64
955          ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
956          "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
957          ", codeptr_ra=%p, invoker=%d\n",
958          ompt_get_thread_data()->value, event, encountering_task_data->value,
959          encountering_task_frame->exit_frame.ptr,
960          encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
961          requested_team_size, codeptr_ra, invoker);
962 }
963 
on_ompt_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * encountering_task_data,int flag,const void * codeptr_ra)964 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
965                                           ompt_data_t *encountering_task_data,
966                                           int flag, const void *codeptr_ra) {
967   int invoker = flag & 0xF;
968   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
969   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
970          ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
971          ompt_get_thread_data()->value, event, parallel_data->value,
972          encountering_task_data->value, invoker, codeptr_ra);
973 }
974 
975 static void
on_ompt_callback_task_create(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)976 on_ompt_callback_task_create(
977     ompt_data_t *encountering_task_data,
978     const ompt_frame_t *encountering_task_frame,
979     ompt_data_t* new_task_data,
980     int type,
981     int has_dependences,
982     const void *codeptr_ra)
983 {
984   if(new_task_data->ptr)
985     printf("0: new_task_data initially not null\n");
986   new_task_data->value = ompt_get_unique_id();
987   char buffer[2048];
988 
989   format_task_type(type, buffer);
990 
991   printf(
992       "%" PRIu64 ":" _TOOL_PREFIX
993       " ompt_event_task_create: parent_task_id=%" PRIu64
994       ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
995       "new_task_id=%" PRIu64
996       ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
997       ompt_get_thread_data()->value,
998       encountering_task_data ? encountering_task_data->value : 0,
999       encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1000       encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1001       new_task_data->value, codeptr_ra, buffer, type,
1002       has_dependences ? "yes" : "no");
1003 }
1004 
1005 static void
on_ompt_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)1006 on_ompt_callback_task_schedule(
1007     ompt_data_t *first_task_data,
1008     ompt_task_status_t prior_task_status,
1009     ompt_data_t *second_task_data)
1010 {
1011   printf("%" PRIu64 ":" _TOOL_PREFIX
1012          " ompt_event_task_schedule: first_task_id=%" PRIu64
1013          ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1014          ompt_get_thread_data()->value, first_task_data->value,
1015          (second_task_data ? second_task_data->value : -1),
1016          ompt_task_status_t_values[prior_task_status], prior_task_status);
1017   if (prior_task_status == ompt_task_complete ||
1018       prior_task_status == ompt_task_late_fulfill) {
1019     printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1020            "\n", ompt_get_thread_data()->value, first_task_data->value);
1021   }
1022 }
1023 
1024 static void
on_ompt_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1025 on_ompt_callback_dependences(
1026   ompt_data_t *task_data,
1027   const ompt_dependence_t *deps,
1028   int ndeps)
1029 {
1030   char buffer[2048];
1031   char *progress = buffer;
1032   for (int i = 0; i < ndeps && progress < buffer + 2000; i++) {
1033     if (deps[i].dependence_type == ompt_dependence_type_source ||
1034         deps[i].dependence_type == ompt_dependence_type_sink)
1035       progress +=
1036           sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1037                   ompt_dependence_type_t_values[deps[i].dependence_type]);
1038     else
1039       progress +=
1040           sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1041                   ompt_dependence_type_t_values[deps[i].dependence_type]);
1042   }
1043   if (ndeps > 0)
1044     progress[-2] = 0;
1045   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1046          ", deps=[%s], ndeps=%d\n",
1047          ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1048 }
1049 
1050 static void
on_ompt_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)1051 on_ompt_callback_task_dependence(
1052   ompt_data_t *first_task_data,
1053   ompt_data_t *second_task_data)
1054 {
1055   printf("%" PRIu64 ":" _TOOL_PREFIX
1056          " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1057          ", second_task_id=%" PRIu64 "\n",
1058          ompt_get_thread_data()->value, first_task_data->value,
1059          second_task_data->value);
1060 }
1061 
1062 static void
on_ompt_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)1063 on_ompt_callback_thread_begin(
1064   ompt_thread_t thread_type,
1065   ompt_data_t *thread_data)
1066 {
1067   if(thread_data->ptr)
1068     printf("%s\n", "0: thread_data initially not null");
1069   thread_data->value = ompt_get_unique_id();
1070   printf("%" PRIu64 ":" _TOOL_PREFIX
1071          " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1072          ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1073          thread_type, thread_data->value);
1074 }
1075 
1076 static void
on_ompt_callback_thread_end(ompt_data_t * thread_data)1077 on_ompt_callback_thread_end(
1078   ompt_data_t *thread_data)
1079 {
1080   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1081          "\n",
1082          ompt_get_thread_data()->value, thread_data->value);
1083 }
1084 
1085 static int
on_ompt_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)1086 on_ompt_callback_control_tool(
1087   uint64_t command,
1088   uint64_t modifier,
1089   void *arg,
1090   const void *codeptr_ra)
1091 {
1092   ompt_frame_t* omptTaskFrame;
1093   ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1094   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1095          ", modifier=%" PRIu64
1096          ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1097          "current_task_frame.reenter=%p \n",
1098          ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1099          omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1100 
1101   // the following would interfere with expected output for OMPT tests, so skip
1102 #ifndef _OMPT_TESTS
1103   // print task data
1104   int task_level = 0;
1105   ompt_data_t *task_data;
1106   while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1107                             NULL, NULL)) {
1108     printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1109            ompt_get_thread_data()->value, task_level, task_data->value);
1110     task_level++;
1111   }
1112 
1113   // print parallel data
1114   int parallel_level = 0;
1115   ompt_data_t *parallel_data;
1116   while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1117                                 NULL)) {
1118     printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1119            "\n",
1120            ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1121     parallel_level++;
1122   }
1123 #endif
1124   return 0; //success
1125 }
1126 
ompt_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * tool_data)1127 int ompt_initialize(
1128   ompt_function_lookup_t lookup,
1129   int initial_device_num,
1130   ompt_data_t *tool_data)
1131 {
1132   ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1133   ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1134   ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1135   ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1136   ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1137   ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1138   ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1139   ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1140   ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1141 
1142   ompt_get_unique_id();
1143 
1144   ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1145   ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1146   ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1147   ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1148   ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1149   ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1150   ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1151   ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1152 
1153   register_callback(ompt_callback_mutex_acquire);
1154   register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1155   register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1156   register_callback(ompt_callback_nest_lock);
1157   register_callback(ompt_callback_sync_region);
1158   register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1159   register_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1160   register_callback(ompt_callback_control_tool);
1161   register_callback(ompt_callback_flush);
1162   register_callback(ompt_callback_cancel);
1163   register_callback(ompt_callback_implicit_task);
1164   register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1165   register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1166   register_callback(ompt_callback_work);
1167   register_callback(ompt_callback_masked);
1168   register_callback(ompt_callback_parallel_begin);
1169   register_callback(ompt_callback_parallel_end);
1170   register_callback(ompt_callback_task_create);
1171   register_callback(ompt_callback_task_schedule);
1172   register_callback(ompt_callback_dependences);
1173   register_callback(ompt_callback_task_dependence);
1174   register_callback(ompt_callback_thread_begin);
1175   register_callback(ompt_callback_thread_end);
1176   printf("0: NULL_POINTER=%p\n", (void*)NULL);
1177   return 1; //success
1178 }
1179 
ompt_finalize(ompt_data_t * tool_data)1180 void ompt_finalize(ompt_data_t *tool_data)
1181 {
1182   printf("0: ompt_event_runtime_shutdown\n");
1183 }
1184 
1185 #ifdef __cplusplus
1186 extern "C" {
1187 #endif
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1188 ompt_start_tool_result_t* ompt_start_tool(
1189   unsigned int omp_version,
1190   const char *runtime_version)
1191 {
1192   static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1193   return &ompt_start_tool_result;
1194 }
1195 #ifdef __cplusplus
1196 }
1197 #endif
1198 #endif // ifndef USE_PRIVATE_TOOL
1199 #ifdef _OMPT_TESTS
1200 #undef _OMPT_TESTS
1201 #endif
1202