#if USE_ITT_BUILD /* * kmp_itt.h -- ITT Notify interface. */ //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// #ifndef KMP_ITT_H #define KMP_ITT_H #include "kmp_lock.h" #define INTEL_ITTNOTIFY_API_PRIVATE #include "ittnotify.h" #include "legacy/ittnotify.h" #if KMP_DEBUG #define __kmp_inline // Turn off inlining in debug mode. #else #define __kmp_inline static inline #endif #if USE_ITT_NOTIFY extern kmp_int32 __kmp_itt_prepare_delay; # ifdef __cplusplus extern "C" void __kmp_itt_fini_ittlib(void); # else extern void __kmp_itt_fini_ittlib(void); # endif #endif // Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. #define USE_ITT_BUILD_ARG(x) ,x void __kmp_itt_initialize(); void __kmp_itt_destroy(); // ------------------------------------------------------------------------------------------------- // New stuff for reporting high-level constructs. // ------------------------------------------------------------------------------------------------- // Note the naming convention: // __kmp_itt_xxxing() function should be called before action, while // __kmp_itt_xxxed() function should be called after action. // --- Parallel region reporting --- __kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads. __kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads. // (*) Note: A thread may execute tasks after this point, though. // --- Frame reporting --- // region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel __kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); // --- Metadata reporting --- // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier __kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size __kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); __kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); // --- Barrier reporting --- __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); __kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object ); __kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object ); // --- Taskwait reporting --- __kmp_inline void * __kmp_itt_taskwait_object( int gtid ); __kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object ); __kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object ); // --- Task reporting --- __kmp_inline void __kmp_itt_task_starting( void * object ); __kmp_inline void __kmp_itt_task_finished( void * object ); // --- Lock reporting --- #if KMP_USE_DYNAMIC_LOCK __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); #else __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); #endif __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); // --- Critical reporting --- #if KMP_USE_DYNAMIC_LOCK __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); #else __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); #endif __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); // --- Single reporting --- __kmp_inline void __kmp_itt_single_start( int gtid ); __kmp_inline void __kmp_itt_single_end( int gtid ); // --- Ordered reporting --- __kmp_inline void __kmp_itt_ordered_init( int gtid ); __kmp_inline void __kmp_itt_ordered_prep( int gtid ); __kmp_inline void __kmp_itt_ordered_start( int gtid ); __kmp_inline void __kmp_itt_ordered_end( int gtid ); // --- Threads reporting --- __kmp_inline void __kmp_itt_thread_ignore(); __kmp_inline void __kmp_itt_thread_name( int gtid ); // --- System objects --- __kmp_inline void __kmp_itt_system_object_created( void * object, char const * name ); // --- Stack stitching --- __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); // ------------------------------------------------------------------------------------------------- // Old stuff for reporting low-level internal synchronization. // ------------------------------------------------------------------------------------------------- #if USE_ITT_NOTIFY /* * Support for SSC marks, which are used by SDE * http://software.intel.com/en-us/articles/intel-software-development-emulator * to mark points in instruction traces that represent spin-loops and are * therefore uninteresting when collecting traces for architecture simulation. */ #ifndef INCLUDE_SSC_MARKS # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) #endif /* Linux 64 only for now */ #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) // Portable (at least for gcc and icc) code to insert the necessary instructions // to set %ebx and execute the unlikely no-op. #if defined( __INTEL_COMPILER ) # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) #else # define INSERT_SSC_MARK(tag) \ __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") #endif #else # define INSERT_SSC_MARK(tag) ((void)0) #endif /* Markers for the start and end of regions that represent polling and * are therefore uninteresting to architectural simulations 0x4376 and * 0x4377 are arbitrary numbers that should be unique in the space of * SSC tags, but there is no central issuing authority rather * randomness is expected to work. */ #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) // Markers for architecture simulation. // FORKING : Before the master thread forks. // JOINING : At the start of the join. // INVOKING : Before the threads invoke microtasks. // DISPATCH_INIT: At the start of dynamically scheduled loop. // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) // The object is an address that associates a specific set of the prepare, acquire, release, // and cancel operations. /* Sync prepare indicates a thread is going to start waiting for another thread to send a release event. This operation should be done just before the thread begins checking for the existence of the release event */ /* Sync cancel indicates a thread is cancelling a wait on another thread anc continuing execution without waiting for the other thread to release it */ /* Sync acquired indicates a thread has received a release event from another thread and has stopped waiting. This operation must occur only after the release event is received. */ /* Sync release indicates a thread is going to send a release event to another thread so it will stop waiting and continue execution. This operation must just happen before the release event. */ #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) ) #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) ) #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) ) #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) ) /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay (and not called at all if waiting time is small). So, in spin loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */ #undef KMP_FSYNC_SPIN_INIT #define KMP_FSYNC_SPIN_INIT( obj, spin ) \ int sync_iters = 0; \ if ( __itt_fsync_prepare_ptr ) { \ if ( obj == NULL ) { \ obj = spin; \ } /* if */ \ } /* if */ \ SSC_MARK_SPIN_START() #undef KMP_FSYNC_SPIN_PREPARE #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \ if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ ++ sync_iters; \ if ( sync_iters >= __kmp_itt_prepare_delay ) { \ KMP_FSYNC_PREPARE( (void*) obj ); \ } /* if */ \ } /* if */ \ } while (0) #undef KMP_FSYNC_SPIN_ACQUIRED #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \ SSC_MARK_SPIN_END(); \ if ( sync_iters >= __kmp_itt_prepare_delay ) { \ KMP_FSYNC_ACQUIRED( (void*) obj ); \ } /* if */ \ } while (0) /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: KMP_ITT_IGNORE( ptr = malloc( size ); ); */ #define KMP_ITT_IGNORE( statement ) do { \ __itt_state_t __itt_state_; \ if ( __itt_state_get_ptr ) { \ __itt_state_ = __itt_state_get(); \ __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ } /* if */ \ { statement } \ if ( __itt_state_get_ptr ) { \ __itt_state_set( __itt_state_ ); \ } /* if */ \ } while (0) const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to // different OpenMP regions in the user source code). extern kmp_int32 __kmp_barrier_domain_count; extern kmp_int32 __kmp_region_domain_count; extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain * metadata_domain; #else // Null definitions of the synchronization tracing functions. # define KMP_FSYNC_PREPARE( obj ) ((void)0) # define KMP_FSYNC_CANCEL( obj ) ((void)0) # define KMP_FSYNC_ACQUIRED( obj ) ((void)0) # define KMP_FSYNC_RELEASING( obj ) ((void)0) # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) #endif // USE_ITT_NOTIFY #if ! KMP_DEBUG // In release mode include definitions of inline functions. #include "kmp_itt.inl" #endif #endif // KMP_ITT_H #else /* USE_ITT_BUILD */ // Null definitions of the synchronization tracing functions. // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. // By defining these we avoid unpleasant ifdef tests in many places. # define KMP_FSYNC_PREPARE( obj ) ((void)0) # define KMP_FSYNC_CANCEL( obj ) ((void)0) # define KMP_FSYNC_ACQUIRED( obj ) ((void)0) # define KMP_FSYNC_RELEASING( obj ) ((void)0) # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) # define USE_ITT_BUILD_ARG(x) #endif /* USE_ITT_BUILD */