diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /contrib/libs/cxxsupp/openmp/kmp_itt.inl | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_itt.inl')
-rw-r--r-- | contrib/libs/cxxsupp/openmp/kmp_itt.inl | 2260 |
1 files changed, 1130 insertions, 1130 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.inl b/contrib/libs/cxxsupp/openmp/kmp_itt.inl index 6dafa6c16e..625d879840 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_itt.inl +++ b/contrib/libs/cxxsupp/openmp/kmp_itt.inl @@ -1,1130 +1,1130 @@ -#if USE_ITT_BUILD -/* - * kmp_itt.inl -- Inline functions of ITT Notify. - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -// Inline function definitions. This file should be included into kmp_itt.h file for prodiction -// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce -// the number of files to recompile and save build time). - - -#include "kmp.h" -#include "kmp_str.h" - -#if KMP_ITT_DEBUG - extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; - #define KMP_ITT_DEBUG_LOCK() { \ - __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } - #define KMP_ITT_DEBUG_PRINT( ... ) { \ - fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \ - fprintf( stderr, __VA_ARGS__ ); \ - fflush( stderr ); \ - __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } -#else - #define KMP_ITT_DEBUG_LOCK() - #define KMP_ITT_DEBUG_PRINT( ... ) -#endif // KMP_ITT_DEBUG - -// Ensure that the functions are static if they're supposed to be -// being inlined. Otherwise they cannot be used in more than one file, -// since there will be multiple definitions. -#if KMP_DEBUG -# define LINKAGE -#else -# define LINKAGE static inline -#endif - -// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this -// API to support user-defined synchronization primitives, but does not use ZCA; -// it would be safe to turn this off until wider support becomes available. -#if USE_ITT_ZCA -#ifdef __INTEL_COMPILER -# if __INTEL_COMPILER >= 1200 -# undef __itt_sync_acquired -# undef __itt_sync_releasing -# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) -# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) -# endif -#endif -#endif - -static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); - -/* - ------------------------------------------------------------------------------------------------ - Parallel region reporting. - - * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of - call does not matter, but it should be completed before any thread of this team calls - __kmp_itt_region_starting. - * __kmp_itt_region_starting should be called by each thread of a team just before entering - parallel region body. - * __kmp_itt_region_finished should be called by each thread of a team right after returning - from parallel region body. - * __kmp_itt_region_joined should be called by master thread of a team, after all threads - called __kmp_itt_region_finished. - - Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more - user code -- such a thread can execute tasks. - - Note: The overhead of logging region_starting and region_finished in each thread is too large, - so these calls are not used. - - ------------------------------------------------------------------------------------------------ -*/ - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) { -#if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc) { - // Use the reserved_2 field to store the index to the region domain. - // Assume that reserved_2 contains zero initially. Since zero is special - // value here, store the index into domain array increased by 1. - if (loc->reserved_2 == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { - // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc - //} // AC: this block is to replace next unsynchronized line - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "<func>$omp$parallel@[file:]<line>[:<col>]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - if( barriers ) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __kmp_str_free( &buff ); - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; - } - } - __kmp_str_loc_free( &str_loc ); - __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } else { // Team size was not changed. Use existing domain. - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n", - gtid, loc->reserved_2, serialized, loc ); - } -#endif -} // __kmp_itt_region_forking - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { -#if USE_ITT_NOTIFY - if( region ) { - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - int serialized = ( region == 2 ? 1 : 0 ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - //Check region domain has not been created before. It's index is saved in the low two bytes. - if ((loc->reserved_2 & 0x0000FFFF) == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } else { // Team size was not changed. Use existing domain. - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, region, loc, begin, end ); - return; - } else { // called for barrier reporting - if (loc) { - if ((loc->reserved_2 & 0xFFFF0000) == 0) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "<func>$omp$frame@[file:]<line>[:<col>]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - if( imbalance ) { - const char * buff_imb = NULL; - buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", - str_loc.func, team_size, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff_imb ); - } else { - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff ); - } - __kmp_str_loc_free( &str_loc ); - } - } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS - if( imbalance ) { - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); - } else { - __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, loc, begin, end ); - } - } -#endif -} // __kmp_itt_frame_submit - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance"); - - kmp_uint64 imbalance_data[ 4 ]; - imbalance_data[ 0 ] = begin; - imbalance_data[ 1 ] = end; - imbalance_data[ 2 ] = imbalance; - imbalance_data[ 3 ] = reduction; - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data); -#endif -} // __kmp_itt_metadata_imbalance - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop"); - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - - kmp_uint64 loop_data[ 5 ]; - loop_data[ 0 ] = str_loc.line; - loop_data[ 1 ] = str_loc.col; - loop_data[ 2 ] = sched_type; - loop_data[ 3 ] = iterations; - loop_data[ 4 ] = chunk; - - __kmp_str_loc_free( &str_loc ); - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data); -#endif -} // __kmp_itt_metadata_loop - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_single( ident_t * loc ) { -#if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); - } - - __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single"); - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - kmp_uint64 single_data[ 2 ]; - single_data[ 0 ] = str_loc.line; - single_data[ 1 ] = str_loc.col; - - __kmp_str_loc_free( &str_loc ); - - __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data); -#endif -} // __kmp_itt_metadata_single - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_starting( int gtid ) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_starting - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_finished( int gtid ) { -#if USE_ITT_NOTIFY -#endif -} // __kmp_itt_region_finished - -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_joined( int gtid, int serialized ) { -#if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc && loc->reserved_2) - { - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if(frm < KMP_MAX_FRAME_DOMAINS) { - KMP_ITT_DEBUG_LOCK(); - __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); - KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n", - gtid, loc->reserved_2, serialized, loc ); - } - } -#endif -} // __kmp_itt_region_joined - -/* - ------------------------------------------------------------------------------------------------ - Barriers reporting. - - A barrier consists of two phases: - - 1. Gather -- master waits for arriving of all the worker threads; each worker thread - registers arrival and goes further. - 2. Release -- each worker threads waits until master lets it go; master lets worker threads - go. - - Function should be called by each thread: - - * __kmp_itt_barrier_starting() -- before arriving to the gather phase. - * __kmp_itt_barrier_middle() -- between gather and release phases. - * __kmp_itt_barrier_finished() -- after release phase. - - Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save - result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather - phase) would return itt sync object for the next barrier! - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have - barrier object or barrier data structure. Barrier is just a counter in team and thread - structures. We could use an address of team structure as an barrier sync object, but ITT wants - different objects for different barriers (even whithin the same team). So let us use - team address as barrier sync object for the first barrier, then increase it by one for the next - barrier, and so on (but wrap it not to use addresses outside of team structure). - - ------------------------------------------------------------------------------------------------ -*/ - -void * -__kmp_itt_barrier_object( - int gtid, - int bt, - int set_name, - int delta // 0 (current barrier) is default value; specify -1 to get previous barrier. -) { - void * object = NULL; -#if USE_ITT_NOTIFY - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - kmp_team_t * team = thr->th.th_team; - - // NOTE: - // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" - // helps to avoid crash. However, this is not complete solution, and reporting fork/join - // barriers to ITT should be revisited. - - if ( team != NULL ) { - - // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived - // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. - kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; - // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of - // different types do not have the same ids. - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); - // This conditon is a must (we would have zero divide otherwise). - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); - // More strong condition: make sure we have room at least for for two differtent ids - // (for each barrier type). - object = - reinterpret_cast< void * >( - kmp_uintptr_t( team ) - + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier - + bt - ); - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); - - if ( set_name ) { - ident_t const * loc = NULL; - char const * src = NULL; - char const * type = "OMP Barrier"; - switch ( bt ) { - case bs_plain_barrier : { - // For plain barrier compiler calls __kmpc_barrier() function, which saves - // location in thr->th.th_ident. - loc = thr->th.th_ident; - // Get the barrier type from flags provided by compiler. - kmp_int32 expl = 0; - kmp_uint32 impl = 0; - if ( loc != NULL ) { - src = loc->psource; - expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; - impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; - }; // if - if ( impl ) { - switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { - case KMP_IDENT_BARRIER_IMPL_FOR : { - type = "OMP For Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SECTIONS : { - type = "OMP Sections Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SINGLE : { - type = "OMP Single Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { - type = "OMP Workshare Barrier"; - } break; - default : { - type = "OMP Implicit Barrier"; - KMP_DEBUG_ASSERT( 0 ); - }; - }; /* switch */ - } else if ( expl ) { - type = "OMP Explicit Barrier"; - }; /* if */ - } break; - case bs_forkjoin_barrier : { - // In case of fork/join barrier we can read thr->th.th_ident, because it - // contains location of last passed construct (while join barrier is not - // such one). Use th_ident of master thread instead -- __kmp_join_call() - // called by the master thread saves location. - // - // AC: cannot read from master because __kmp_join_call may be not called - // yet, so we read the location from team. This is the same location. - // And team is valid at the enter to join barrier where this happens. - loc = team->t.t_ident; - if ( loc != NULL ) { - src = loc->psource; - }; // if - type = "OMP Join Barrier"; - } break; - }; // switch - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, type, src, __itt_attr_barrier ); - KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); - }; // if - - }; // if -#endif - return object; -} // __kmp_itt_barrier_object - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_starting( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( !KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); - }; // if - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); -#endif -} // __kmp_itt_barrier_starting - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_middle( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); - } else { - }; // if -#endif -} // __kmp_itt_barrier_middle - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_finished( int gtid, void * object ) { -#if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - } else { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); - }; // if -#endif -} // __kmp_itt_barrier_finished - -/* - ------------------------------------------------------------------------------------------------ - Taskwait reporting. - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait - structure, so we need to construct something. - -*/ - -void * -__kmp_itt_taskwait_object( int gtid ) { - void * object = NULL; -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - object = - reinterpret_cast< void * >( - kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) - ); - }; // if -#endif - return object; -} // __kmp_itt_taskwait_object - -void -__kmp_itt_taskwait_starting( - int gtid, - void * object -) { -#if USE_ITT_NOTIFY - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - ident_t const * loc = taskdata->td_taskwait_ident; - char const * src = ( loc == NULL? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Taskwait", src, 0 ); - KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); -#endif -} // __kmp_itt_taskwait_starting - -void -__kmp_itt_taskwait_finished( - int gtid, - void * object -) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); -#endif -} // __kmp_itt_taskwait_finished - -/* - ------------------------------------------------------------------------------------------------ - Task reporting. - - Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). - Synch object passed to the function must be barrier of taskwait the threads waiting at. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_task_starting( - void * object // ITT sync object: barrier or taskwait. -) { -#if USE_ITT_NOTIFY - if ( object != NULL ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_cancel( object ); - KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); - }; // if -#endif -} // __kmp_itt_task_starting - -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_task_finished( - void * object // ITT sync object: barrier or taskwait. -) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); -#endif -} // __kmp_itt_task_finished - -// ------------------------------------------------------------------------------------------------- - -/* - ------------------------------------------------------------------------------------------------ - Lock reporting. - - * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation - (set/unset). It is not a real event shown to the user but just setting a name for - synchronization object. `lock' is an address of sync object, the same address should be - used in all subsequent calls. - - * __kmp_itt_lock_acquiring() should be called before setting the lock. - - * __kmp_itt_lock_acquired() should be called after setting the lock. - - * __kmp_itt_lock_realeasing() should be called before unsetting the lock. - - * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. - - * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After - __kmp_itt_lock_destroyed() all the references to the same address will be considered - as another sync object, not related with the original one. - ------------------------------------------------------------------------------------------------ -*/ - -// ------------------------------------------------------------------------------------------------- - -#if KMP_USE_DYNAMIC_LOCK -// Takes location information directly -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - } -#endif -} -#else // KMP_USE_DYNAMIC_LOCK -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - ident_t const * loc = NULL; - if ( __kmp_get_user_lock_location_ != NULL ) - loc = __kmp_get_user_lock_location_( (lock) ); - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - }; // if -#endif -} // ___kmp_itt_lock_init -#endif // KMP_USE_DYNAMIC_LOCK - -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( lock ); - KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); -#endif -} // ___kmp_itt_lock_fini - - -// ------------------------------------------------------------------------------------------------- - -#if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Lock", loc ); -} -#else -void -__kmp_itt_lock_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Lock" ); -} // __kmp_itt_lock_creating -#endif - -void -__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_prepare_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_prepare( ilk->lock ); - } else { - __itt_sync_prepare( lock ); - } - } -#else - __itt_sync_prepare( lock ); -#endif -} // __kmp_itt_lock_acquiring - -void -__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_acquired_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_acquired( ilk->lock ); - } else { - __itt_sync_acquired( lock ); - } - } -#else - __itt_sync_acquired( lock ); -#endif -} // __kmp_itt_lock_acquired - -void -__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_releasing_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_releasing( ilk->lock ); - } else { - __itt_sync_releasing( lock ); - } - } -#else - __itt_sync_releasing( lock ); -#endif -} // __kmp_itt_lock_releasing - -void -__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { -#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_cancel_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_cancel( ilk->lock ); - } else { - __itt_sync_cancel( lock ); - } - } -#else - __itt_sync_cancel( lock ); -#endif -} // __kmp_itt_lock_cancelled - -void -__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Lock" ); -} // __kmp_itt_lock_destroyed - -/* - ------------------------------------------------------------------------------------------------ - Critical reporting. - - Critical sections are treated exactly as locks (but have different object type). - ------------------------------------------------------------------------------------------------ -*/ -#if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Critical", loc); -} -#else -void -__kmp_itt_critical_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Critical" ); -} // __kmp_itt_critical_creating -#endif - -void -__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { - __itt_sync_prepare( lock ); -} // __kmp_itt_critical_acquiring - -void -__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { - __itt_sync_acquired( lock ); -} // __kmp_itt_critical_acquired - -void -__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { - __itt_sync_releasing( lock ); -} // __kmp_itt_critical_releasing - -void -__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Critical" ); -} // __kmp_itt_critical_destroyed - -/* - ------------------------------------------------------------------------------------------------ - Single reporting. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_single_start( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { - kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); - ident_t * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - __kmp_str_buf_print( & name, "OMP Single-%s", src ); - KMP_ITT_DEBUG_LOCK(); - thr->th.th_itt_mark_single = __itt_mark_create( name.str ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); - __kmp_str_buf_free( & name ); - KMP_ITT_DEBUG_LOCK(); - __itt_mark( thr->th.th_itt_mark_single, NULL ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); - }; // if -#endif -} // __kmp_itt_single_start - -void -__kmp_itt_single_end( int gtid ) { -#if USE_ITT_NOTIFY - __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; - KMP_ITT_DEBUG_LOCK(); - __itt_mark_off( mark ); - KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); -#endif -} // __kmp_itt_single_end - -/* - ------------------------------------------------------------------------------------------------ - Ordered reporting. - - __kmp_itt_ordered_init is called by each thread *before* first using sync - object. ITT team would like it to be called once, but it requires extra synchronization. - - __kmp_itt_ordered_prep is called when thread is going to enter ordered section - (before synchronization). - - __kmp_itt_ordered_start is called just before entering user code (after - synchronization). - - __kmp_itt_ordered_end is called after returning from user code. - - Sync object is th->th.th_dispatch->th_dispatch_sh_current. - - Events are not generated in case of serialized team. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_ordered_init( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - ident_t const * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - __itt_sync_create( - thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 - ); - }; // if -#endif -} // __kmp_itt_ordered_init - -void -__kmp_itt_ordered_prep( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_prep - -void -__kmp_itt_ordered_start( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_start - -void -__kmp_itt_ordered_end( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if - }; // if -#endif -} // __kmp_itt_ordered_end - - -/* - ------------------------------------------------------------------------------------------------ - Threads reporting. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_thread_ignore() { - __itt_thr_ignore(); -} // __kmp_itt_thread_ignore - -void -__kmp_itt_thread_name( int gtid ) { -#if USE_ITT_NOTIFY - if ( __itt_thr_name_set_ptr ) { - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - if( KMP_MASTER_GTID(gtid) ) { - __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); - } else { - __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); - } - KMP_ITT_DEBUG_LOCK(); - __itt_thr_name_set( name.str, name.used ); - KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); - __kmp_str_buf_free( & name ); - }; // if -#endif -} // __kmp_itt_thread_name - - -/* - -------------------------------------------------------------------------- - System object reporting. - - ITT catches operations with system sync objects (like Windows* OS on IA-32 - architecture API critical sections and events). We only need to specify - name ("OMP Scheduler") for the object to let ITT know it is an object used - by OpenMP RTL for internal purposes. - -------------------------------------------------------------------------- -*/ - -void -__kmp_itt_system_object_created( void * object, char const * name ) { -#if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Scheduler", name, 0 ); - KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); -#endif -} // __kmp_itt_system_object_created - - -/* - ------------------------------------------------------------------------------------------------ - Stack stitching api. - - Master calls "create" and put the stitching id into team structure. - Workers read the stitching id and call "enter" / "leave" api. - Master calls "destroy" at the end of the parallel region. - ------------------------------------------------------------------------------------------------ -*/ - -__itt_caller -__kmp_itt_stack_caller_create() -{ -#if USE_ITT_NOTIFY - if ( !__itt_stack_caller_create_ptr ) - return NULL; - KMP_ITT_DEBUG_LOCK(); - __itt_caller id = __itt_stack_caller_create(); - KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); - return id; -#endif - return NULL; -} - -void -__kmp_itt_stack_caller_destroy( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_caller_destroy_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_caller_destroy( id ); - KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); - } -#endif -} - -void -__kmp_itt_stack_callee_enter( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_callee_enter_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_enter( id ); - KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); - } -#endif -} - -void -__kmp_itt_stack_callee_leave( __itt_caller id ) -{ -#if USE_ITT_NOTIFY - if ( __itt_stack_callee_leave_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_leave( id ); - KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); - } -#endif -} - -#endif /* USE_ITT_BUILD */ +#if USE_ITT_BUILD +/* + * kmp_itt.inl -- Inline functions of ITT Notify. + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +// Inline function definitions. This file should be included into kmp_itt.h file for prodiction +// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce +// the number of files to recompile and save build time). + + +#include "kmp.h" +#include "kmp_str.h" + +#if KMP_ITT_DEBUG + extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; + #define KMP_ITT_DEBUG_LOCK() { \ + __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \ + } + #define KMP_ITT_DEBUG_PRINT( ... ) { \ + fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \ + fprintf( stderr, __VA_ARGS__ ); \ + fflush( stderr ); \ + __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \ + } +#else + #define KMP_ITT_DEBUG_LOCK() + #define KMP_ITT_DEBUG_PRINT( ... ) +#endif // KMP_ITT_DEBUG + +// Ensure that the functions are static if they're supposed to be +// being inlined. Otherwise they cannot be used in more than one file, +// since there will be multiple definitions. +#if KMP_DEBUG +# define LINKAGE +#else +# define LINKAGE static inline +#endif + +// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this +// API to support user-defined synchronization primitives, but does not use ZCA; +// it would be safe to turn this off until wider support becomes available. +#if USE_ITT_ZCA +#ifdef __INTEL_COMPILER +# if __INTEL_COMPILER >= 1200 +# undef __itt_sync_acquired +# undef __itt_sync_releasing +# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) +# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) +# endif +#endif +#endif + +static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); + +/* + ------------------------------------------------------------------------------------------------ + Parallel region reporting. + + * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of + call does not matter, but it should be completed before any thread of this team calls + __kmp_itt_region_starting. + * __kmp_itt_region_starting should be called by each thread of a team just before entering + parallel region body. + * __kmp_itt_region_finished should be called by each thread of a team right after returning + from parallel region body. + * __kmp_itt_region_joined should be called by master thread of a team, after all threads + called __kmp_itt_region_finished. + + Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more + user code -- such a thread can execute tasks. + + Note: The overhead of logging region_starting and region_finished in each thread is too large, + so these calls are not used. + + ------------------------------------------------------------------------------------------------ +*/ + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) { +#if USE_ITT_NOTIFY + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; + if (loc) { + // Use the reserved_2 field to store the index to the region domain. + // Assume that reserved_2 contains zero initially. Since zero is special + // value here, store the index into domain array increased by 1. + if (loc->reserved_2 == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { + // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc + //} // AC: this block is to replace next unsynchronized line + + // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 + // field but put region index to the low two bytes and barrier indexes to the high + // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "<func>$omp$parallel@[file:]<line>[:<col>]" + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + if( barriers ) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + const char * buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + __kmp_str_free( &buff ); + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; + } + } + __kmp_str_loc_free( &str_loc ); + __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if( __kmp_itt_region_team_size[frm] != team_size ) { + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } else { // Team size was not changed. Use existing domain. + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n", + gtid, loc->reserved_2, serialized, loc ); + } +#endif +} // __kmp_itt_region_forking + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { +#if USE_ITT_NOTIFY + if( region ) { + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + int serialized = ( region == 2 ? 1 : 0 ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + //Check region domain has not been created before. It's index is saved in the low two bytes. + if ((loc->reserved_2 & 0x0000FFFF) == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + + // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 + // field but put region index to the low two bytes and barrier indexes to the high + // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if( __kmp_itt_region_team_size[frm] != team_size ) { + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line, str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __kmp_str_loc_free( &str_loc ); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } else { // Team size was not changed. Use existing domain. + __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", + gtid, loc->reserved_2, region, loc, begin, end ); + return; + } else { // called for barrier reporting + if (loc) { + if ((loc->reserved_2 & 0xFFFF0000) == 0) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "<func>$omp$frame@[file:]<line>[:<col>]" + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + if( imbalance ) { + const char * buff_imb = NULL; + buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", + str_loc.func, team_size, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); + __kmp_str_free( &buff_imb ); + } else { + const char * buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); + __kmp_str_free( &buff ); + } + __kmp_str_loc_free( &str_loc ); + } + } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS + if( imbalance ) { + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); + } else { + __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", + gtid, loc->reserved_2, loc, begin, end ); + } + } +#endif +} // __kmp_itt_frame_submit + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance"); + + kmp_uint64 imbalance_data[ 4 ]; + imbalance_data[ 0 ] = begin; + imbalance_data[ 1 ] = end; + imbalance_data[ 2 ] = imbalance; + imbalance_data[ 3 ] = reduction; + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data); +#endif +} // __kmp_itt_metadata_imbalance + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop"); + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + + kmp_uint64 loop_data[ 5 ]; + loop_data[ 0 ] = str_loc.line; + loop_data[ 1 ] = str_loc.col; + loop_data[ 2 ] = sched_type; + loop_data[ 3 ] = iterations; + loop_data[ 4 ] = chunk; + + __kmp_str_loc_free( &str_loc ); + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data); +#endif +} // __kmp_itt_metadata_loop + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_metadata_single( ident_t * loc ) { +#if USE_ITT_NOTIFY + if( metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock( & metadata_lock ); + if( metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create( "OMP Metadata" ); + __itt_suppress_pop(); + } + __kmp_release_bootstrap_lock( & metadata_lock ); + } + + __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single"); + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + kmp_uint64 single_data[ 2 ]; + single_data[ 0 ] = str_loc.line; + single_data[ 1 ] = str_loc.col; + + __kmp_str_loc_free( &str_loc ); + + __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data); +#endif +} // __kmp_itt_metadata_single + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_starting( int gtid ) { +#if USE_ITT_NOTIFY +#endif +} // __kmp_itt_region_starting + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_finished( int gtid ) { +#if USE_ITT_NOTIFY +#endif +} // __kmp_itt_region_finished + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void +__kmp_itt_region_joined( int gtid, int serialized ) { +#if USE_ITT_NOTIFY + kmp_team_t * team = __kmp_team_from_gtid( gtid ); + if (team->t.t_active_level + serialized > 1) + { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; + if (loc && loc->reserved_2) + { + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if(frm < KMP_MAX_FRAME_DOMAINS) { + KMP_ITT_DEBUG_LOCK(); + __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); + KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n", + gtid, loc->reserved_2, serialized, loc ); + } + } +#endif +} // __kmp_itt_region_joined + +/* + ------------------------------------------------------------------------------------------------ + Barriers reporting. + + A barrier consists of two phases: + + 1. Gather -- master waits for arriving of all the worker threads; each worker thread + registers arrival and goes further. + 2. Release -- each worker threads waits until master lets it go; master lets worker threads + go. + + Function should be called by each thread: + + * __kmp_itt_barrier_starting() -- before arriving to the gather phase. + * __kmp_itt_barrier_middle() -- between gather and release phases. + * __kmp_itt_barrier_finished() -- after release phase. + + Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save + result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather + phase) would return itt sync object for the next barrier! + + ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have + barrier object or barrier data structure. Barrier is just a counter in team and thread + structures. We could use an address of team structure as an barrier sync object, but ITT wants + different objects for different barriers (even whithin the same team). So let us use + team address as barrier sync object for the first barrier, then increase it by one for the next + barrier, and so on (but wrap it not to use addresses outside of team structure). + + ------------------------------------------------------------------------------------------------ +*/ + +void * +__kmp_itt_barrier_object( + int gtid, + int bt, + int set_name, + int delta // 0 (current barrier) is default value; specify -1 to get previous barrier. +) { + void * object = NULL; +#if USE_ITT_NOTIFY + kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); + kmp_team_t * team = thr->th.th_team; + + // NOTE: + // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" + // helps to avoid crash. However, this is not complete solution, and reporting fork/join + // barriers to ITT should be revisited. + + if ( team != NULL ) { + + // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived + // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. + kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; + // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of + // different types do not have the same ids. + KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); + // This conditon is a must (we would have zero divide otherwise). + KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); + // More strong condition: make sure we have room at least for for two differtent ids + // (for each barrier type). + object = + reinterpret_cast< void * >( + kmp_uintptr_t( team ) + + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier + + bt + ); + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); + + if ( set_name ) { + ident_t const * loc = NULL; + char const * src = NULL; + char const * type = "OMP Barrier"; + switch ( bt ) { + case bs_plain_barrier : { + // For plain barrier compiler calls __kmpc_barrier() function, which saves + // location in thr->th.th_ident. + loc = thr->th.th_ident; + // Get the barrier type from flags provided by compiler. + kmp_int32 expl = 0; + kmp_uint32 impl = 0; + if ( loc != NULL ) { + src = loc->psource; + expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; + impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; + }; // if + if ( impl ) { + switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { + case KMP_IDENT_BARRIER_IMPL_FOR : { + type = "OMP For Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SECTIONS : { + type = "OMP Sections Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SINGLE : { + type = "OMP Single Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { + type = "OMP Workshare Barrier"; + } break; + default : { + type = "OMP Implicit Barrier"; + KMP_DEBUG_ASSERT( 0 ); + }; + }; /* switch */ + } else if ( expl ) { + type = "OMP Explicit Barrier"; + }; /* if */ + } break; + case bs_forkjoin_barrier : { + // In case of fork/join barrier we can read thr->th.th_ident, because it + // contains location of last passed construct (while join barrier is not + // such one). Use th_ident of master thread instead -- __kmp_join_call() + // called by the master thread saves location. + // + // AC: cannot read from master because __kmp_join_call may be not called + // yet, so we read the location from team. This is the same location. + // And team is valid at the enter to join barrier where this happens. + loc = team->t.t_ident; + if ( loc != NULL ) { + src = loc->psource; + }; // if + type = "OMP Join Barrier"; + } break; + }; // switch + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, type, src, __itt_attr_barrier ); + KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); + }; // if + + }; // if +#endif + return object; +} // __kmp_itt_barrier_object + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_starting( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( !KMP_MASTER_GTID( gtid ) ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_releasing( object ); + KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); + }; // if + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); +#endif +} // __kmp_itt_barrier_starting + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_middle( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( KMP_MASTER_GTID( gtid ) ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_releasing( object ); + KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); + } else { + }; // if +#endif +} // __kmp_itt_barrier_middle + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_barrier_finished( int gtid, void * object ) { +#if USE_ITT_NOTIFY + if ( KMP_MASTER_GTID( gtid ) ) { + } else { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); + }; // if +#endif +} // __kmp_itt_barrier_finished + +/* + ------------------------------------------------------------------------------------------------ + Taskwait reporting. + + ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait + structure, so we need to construct something. + +*/ + +void * +__kmp_itt_taskwait_object( int gtid ) { + void * object = NULL; +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); + kmp_taskdata_t * taskdata = thread -> th.th_current_task; + object = + reinterpret_cast< void * >( + kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) + ); + }; // if +#endif + return object; +} // __kmp_itt_taskwait_object + +void +__kmp_itt_taskwait_starting( + int gtid, + void * object +) { +#if USE_ITT_NOTIFY + kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); + kmp_taskdata_t * taskdata = thread -> th.th_current_task; + ident_t const * loc = taskdata->td_taskwait_ident; + char const * src = ( loc == NULL? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, "OMP Taskwait", src, 0 ); + KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); +#endif +} // __kmp_itt_taskwait_starting + +void +__kmp_itt_taskwait_finished( + int gtid, + void * object +) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired( object ); + KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy( object ); + KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); +#endif +} // __kmp_itt_taskwait_finished + +/* + ------------------------------------------------------------------------------------------------ + Task reporting. + + Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). + Synch object passed to the function must be barrier of taskwait the threads waiting at. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_task_starting( + void * object // ITT sync object: barrier or taskwait. +) { +#if USE_ITT_NOTIFY + if ( object != NULL ) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_cancel( object ); + KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); + }; // if +#endif +} // __kmp_itt_task_starting + +// ------------------------------------------------------------------------------------------------- + +void +__kmp_itt_task_finished( + void * object // ITT sync object: barrier or taskwait. +) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare( object ); + KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); +#endif +} // __kmp_itt_task_finished + +// ------------------------------------------------------------------------------------------------- + +/* + ------------------------------------------------------------------------------------------------ + Lock reporting. + + * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation + (set/unset). It is not a real event shown to the user but just setting a name for + synchronization object. `lock' is an address of sync object, the same address should be + used in all subsequent calls. + + * __kmp_itt_lock_acquiring() should be called before setting the lock. + + * __kmp_itt_lock_acquired() should be called after setting the lock. + + * __kmp_itt_lock_realeasing() should be called before unsetting the lock. + + * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. + + * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After + __kmp_itt_lock_destroyed() all the references to the same address will be considered + as another sync object, not related with the original one. + ------------------------------------------------------------------------------------------------ +*/ + +// ------------------------------------------------------------------------------------------------- + +#if KMP_USE_DYNAMIC_LOCK +// Takes location information directly +__kmp_inline +void +___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + char const * src = ( loc == NULL ? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( lock, type, src, 0 ); + KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); + } +#endif +} +#else // KMP_USE_DYNAMIC_LOCK +// Internal guts -- common code for locks and critical sections, do not call directly. +__kmp_inline +void +___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + ident_t const * loc = NULL; + if ( __kmp_get_user_lock_location_ != NULL ) + loc = __kmp_get_user_lock_location_( (lock) ); + char const * src = ( loc == NULL ? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( lock, type, src, 0 ); + KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); + }; // if +#endif +} // ___kmp_itt_lock_init +#endif // KMP_USE_DYNAMIC_LOCK + +// Internal guts -- common code for locks and critical sections, do not call directly. +__kmp_inline +void +___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy( lock ); + KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); +#endif +} // ___kmp_itt_lock_fini + + +// ------------------------------------------------------------------------------------------------- + +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Lock", loc ); +} +#else +void +__kmp_itt_lock_creating( kmp_user_lock_p lock ) { + ___kmp_itt_lock_init( lock, "OMP Lock" ); +} // __kmp_itt_lock_creating +#endif + +void +__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_prepare_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_prepare( ilk->lock ); + } else { + __itt_sync_prepare( lock ); + } + } +#else + __itt_sync_prepare( lock ); +#endif +} // __kmp_itt_lock_acquiring + +void +__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_acquired_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_acquired( ilk->lock ); + } else { + __itt_sync_acquired( lock ); + } + } +#else + __itt_sync_acquired( lock ); +#endif +} // __kmp_itt_lock_acquired + +void +__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_releasing_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_releasing( ilk->lock ); + } else { + __itt_sync_releasing( lock ); + } + } +#else + __itt_sync_releasing( lock ); +#endif +} // __kmp_itt_lock_releasing + +void +__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_cancel_ptr ) { + if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_cancel( ilk->lock ); + } else { + __itt_sync_cancel( lock ); + } + } +#else + __itt_sync_cancel( lock ); +#endif +} // __kmp_itt_lock_cancelled + +void +__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { + ___kmp_itt_lock_fini( lock, "OMP Lock" ); +} // __kmp_itt_lock_destroyed + +/* + ------------------------------------------------------------------------------------------------ + Critical reporting. + + Critical sections are treated exactly as locks (but have different object type). + ------------------------------------------------------------------------------------------------ +*/ +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Critical", loc); +} +#else +void +__kmp_itt_critical_creating( kmp_user_lock_p lock ) { + ___kmp_itt_lock_init( lock, "OMP Critical" ); +} // __kmp_itt_critical_creating +#endif + +void +__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { + __itt_sync_prepare( lock ); +} // __kmp_itt_critical_acquiring + +void +__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { + __itt_sync_acquired( lock ); +} // __kmp_itt_critical_acquired + +void +__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { + __itt_sync_releasing( lock ); +} // __kmp_itt_critical_releasing + +void +__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { + ___kmp_itt_lock_fini( lock, "OMP Critical" ); +} // __kmp_itt_critical_destroyed + +/* + ------------------------------------------------------------------------------------------------ + Single reporting. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_single_start( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { + kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); + ident_t * loc = thr->th.th_ident; + char const * src = ( loc == NULL ? NULL : loc->psource ); + kmp_str_buf_t name; + __kmp_str_buf_init( & name ); + __kmp_str_buf_print( & name, "OMP Single-%s", src ); + KMP_ITT_DEBUG_LOCK(); + thr->th.th_itt_mark_single = __itt_mark_create( name.str ); + KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); + __kmp_str_buf_free( & name ); + KMP_ITT_DEBUG_LOCK(); + __itt_mark( thr->th.th_itt_mark_single, NULL ); + KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); + }; // if +#endif +} // __kmp_itt_single_start + +void +__kmp_itt_single_end( int gtid ) { +#if USE_ITT_NOTIFY + __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; + KMP_ITT_DEBUG_LOCK(); + __itt_mark_off( mark ); + KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); +#endif +} // __kmp_itt_single_end + +/* + ------------------------------------------------------------------------------------------------ + Ordered reporting. + + __kmp_itt_ordered_init is called by each thread *before* first using sync + object. ITT team would like it to be called once, but it requires extra synchronization. + + __kmp_itt_ordered_prep is called when thread is going to enter ordered section + (before synchronization). + + __kmp_itt_ordered_start is called just before entering user code (after + synchronization). + + __kmp_itt_ordered_end is called after returning from user code. + + Sync object is th->th.th_dispatch->th_dispatch_sh_current. + + Events are not generated in case of serialized team. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_ordered_init( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); + ident_t const * loc = thr->th.th_ident; + char const * src = ( loc == NULL ? NULL : loc->psource ); + __itt_sync_create( + thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 + ); + }; // if +#endif +} // __kmp_itt_ordered_init + +void +__kmp_itt_ordered_prep( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_prep + +void +__kmp_itt_ordered_start( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_start + +void +__kmp_itt_ordered_end( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + kmp_team_t * t = __kmp_team_from_gtid( gtid ); + if ( ! t->t.t_serialized ) { + kmp_info_t * th = __kmp_thread_from_gtid( gtid ); + __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); + }; // if + }; // if +#endif +} // __kmp_itt_ordered_end + + +/* + ------------------------------------------------------------------------------------------------ + Threads reporting. + ------------------------------------------------------------------------------------------------ +*/ + +void +__kmp_itt_thread_ignore() { + __itt_thr_ignore(); +} // __kmp_itt_thread_ignore + +void +__kmp_itt_thread_name( int gtid ) { +#if USE_ITT_NOTIFY + if ( __itt_thr_name_set_ptr ) { + kmp_str_buf_t name; + __kmp_str_buf_init( & name ); + if( KMP_MASTER_GTID(gtid) ) { + __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); + } else { + __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); + } + KMP_ITT_DEBUG_LOCK(); + __itt_thr_name_set( name.str, name.used ); + KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); + __kmp_str_buf_free( & name ); + }; // if +#endif +} // __kmp_itt_thread_name + + +/* + -------------------------------------------------------------------------- + System object reporting. + + ITT catches operations with system sync objects (like Windows* OS on IA-32 + architecture API critical sections and events). We only need to specify + name ("OMP Scheduler") for the object to let ITT know it is an object used + by OpenMP RTL for internal purposes. + -------------------------------------------------------------------------- +*/ + +void +__kmp_itt_system_object_created( void * object, char const * name ) { +#if USE_ITT_NOTIFY + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( object, "OMP Scheduler", name, 0 ); + KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); +#endif +} // __kmp_itt_system_object_created + + +/* + ------------------------------------------------------------------------------------------------ + Stack stitching api. + + Master calls "create" and put the stitching id into team structure. + Workers read the stitching id and call "enter" / "leave" api. + Master calls "destroy" at the end of the parallel region. + ------------------------------------------------------------------------------------------------ +*/ + +__itt_caller +__kmp_itt_stack_caller_create() +{ +#if USE_ITT_NOTIFY + if ( !__itt_stack_caller_create_ptr ) + return NULL; + KMP_ITT_DEBUG_LOCK(); + __itt_caller id = __itt_stack_caller_create(); + KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); + return id; +#endif + return NULL; +} + +void +__kmp_itt_stack_caller_destroy( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_caller_destroy_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_caller_destroy( id ); + KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); + } +#endif +} + +void +__kmp_itt_stack_callee_enter( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_callee_enter_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_enter( id ); + KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); + } +#endif +} + +void +__kmp_itt_stack_callee_leave( __itt_caller id ) +{ +#if USE_ITT_NOTIFY + if ( __itt_stack_callee_leave_ptr ) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_leave( id ); + KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); + } +#endif +} + +#endif /* USE_ITT_BUILD */ |