aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/openmp/kmp_itt.inl
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /contrib/libs/cxxsupp/openmp/kmp_itt.inl
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_itt.inl')
-rw-r--r--contrib/libs/cxxsupp/openmp/kmp_itt.inl2260
1 files changed, 1130 insertions, 1130 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.inl b/contrib/libs/cxxsupp/openmp/kmp_itt.inl
index 6dafa6c16e..625d879840 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_itt.inl
+++ b/contrib/libs/cxxsupp/openmp/kmp_itt.inl
@@ -1,1130 +1,1130 @@
-#if USE_ITT_BUILD
-/*
- * kmp_itt.inl -- Inline functions of ITT Notify.
- */
-
-
-//===----------------------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-// Inline function definitions. This file should be included into kmp_itt.h file for prodiction
-// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce
-// the number of files to recompile and save build time).
-
-
-#include "kmp.h"
-#include "kmp_str.h"
-
-#if KMP_ITT_DEBUG
- extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
- #define KMP_ITT_DEBUG_LOCK() { \
- __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \
- }
- #define KMP_ITT_DEBUG_PRINT( ... ) { \
- fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \
- fprintf( stderr, __VA_ARGS__ ); \
- fflush( stderr ); \
- __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \
- }
-#else
- #define KMP_ITT_DEBUG_LOCK()
- #define KMP_ITT_DEBUG_PRINT( ... )
-#endif // KMP_ITT_DEBUG
-
-// Ensure that the functions are static if they're supposed to be
-// being inlined. Otherwise they cannot be used in more than one file,
-// since there will be multiple definitions.
-#if KMP_DEBUG
-# define LINKAGE
-#else
-# define LINKAGE static inline
-#endif
-
-// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this
-// API to support user-defined synchronization primitives, but does not use ZCA;
-// it would be safe to turn this off until wider support becomes available.
-#if USE_ITT_ZCA
-#ifdef __INTEL_COMPILER
-# if __INTEL_COMPILER >= 1200
-# undef __itt_sync_acquired
-# undef __itt_sync_releasing
-# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr)
-# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr)
-# endif
-#endif
-#endif
-
-static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock );
-
-/*
- ------------------------------------------------------------------------------------------------
- Parallel region reporting.
-
- * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of
- call does not matter, but it should be completed before any thread of this team calls
- __kmp_itt_region_starting.
- * __kmp_itt_region_starting should be called by each thread of a team just before entering
- parallel region body.
- * __kmp_itt_region_finished should be called by each thread of a team right after returning
- from parallel region body.
- * __kmp_itt_region_joined should be called by master thread of a team, after all threads
- called __kmp_itt_region_finished.
-
- Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more
- user code -- such a thread can execute tasks.
-
- Note: The overhead of logging region_starting and region_finished in each thread is too large,
- so these calls are not used.
-
- ------------------------------------------------------------------------------------------------
-*/
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) {
-#if USE_ITT_NOTIFY
- kmp_team_t * team = __kmp_team_from_gtid( gtid );
- if (team->t.t_active_level + serialized > 1)
- {
- // The frame notifications are only supported for the outermost teams.
- return;
- }
- ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident;
- if (loc) {
- // Use the reserved_2 field to store the index to the region domain.
- // Assume that reserved_2 contains zero initially. Since zero is special
- // value here, store the index into domain array increased by 1.
- if (loc->reserved_2 == 0) {
- if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count
- return; // loc->reserved_2 is still 0
- }
- //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
- // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc
- //} // AC: this block is to replace next unsynchronized line
-
- // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
- // field but put region index to the low two bytes and barrier indexes to the high
- // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
- loc->reserved_2 |= (frm + 1); // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$parallel@[file:]<line>[:<col>]"
- const char * buff = NULL;
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
- str_loc.func, team_size, str_loc.file,
- str_loc.line, str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
-
- __kmp_str_free( &buff );
- if( barriers ) {
- if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count
- return; // loc->reserved_2 is still 0
- }
- const char * buff = NULL;
- buff = __kmp_str_format("%s$omp$barrier@%s:%d",
- str_loc.func, str_loc.file, str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
- __kmp_str_free( &buff );
- // Save the barrier frame index to the high two bytes.
- loc->reserved_2 |= (frm + 1) << 16;
- }
- }
- __kmp_str_loc_free( &str_loc );
- __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL);
- }
- } else { // Region domain exists for this location
- // Check if team size was changed. Then create new region domain for this location
- int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if( __kmp_itt_region_team_size[frm] != team_size ) {
- const char * buff = NULL;
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
- str_loc.func, team_size, str_loc.file,
- str_loc.line, str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
-
- __kmp_str_free( &buff );
- __kmp_str_loc_free( &str_loc );
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
- } else { // Team size was not changed. Use existing domain.
- __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
- }
- }
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
- gtid, loc->reserved_2, serialized, loc );
- }
-#endif
-} // __kmp_itt_region_forking
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) {
-#if USE_ITT_NOTIFY
- if( region ) {
- kmp_team_t * team = __kmp_team_from_gtid( gtid );
- int serialized = ( region == 2 ? 1 : 0 );
- if (team->t.t_active_level + serialized > 1)
- {
- // The frame notifications are only supported for the outermost teams.
- return;
- }
- //Check region domain has not been created before. It's index is saved in the low two bytes.
- if ((loc->reserved_2 & 0x0000FFFF) == 0) {
- if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count
- return; // loc->reserved_2 is still 0
- }
-
- // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
- // field but put region index to the low two bytes and barrier indexes to the high
- // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
- loc->reserved_2 |= (frm + 1); // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
- const char * buff = NULL;
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
- str_loc.func, team_size, str_loc.file,
- str_loc.line, str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
-
- __kmp_str_free( &buff );
- __kmp_str_loc_free( &str_loc );
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
- }
- } else { // Region domain exists for this location
- // Check if team size was changed. Then create new region domain for this location
- int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if( __kmp_itt_region_team_size[frm] != team_size ) {
- const char * buff = NULL;
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
- str_loc.func, team_size, str_loc.file,
- str_loc.line, str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
-
- __kmp_str_free( &buff );
- __kmp_str_loc_free( &str_loc );
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
- } else { // Team size was not changed. Use existing domain.
- __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
- }
- }
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
- gtid, loc->reserved_2, region, loc, begin, end );
- return;
- } else { // called for barrier reporting
- if (loc) {
- if ((loc->reserved_2 & 0xFFFF0000) == 0) {
- if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count
- return; // loc->reserved_2 is still 0
- }
- // Save the barrier frame index to the high two bytes.
- loc->reserved_2 |= (frm + 1) << 16; // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$frame@[file:]<line>[:<col>]"
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- if( imbalance ) {
- const char * buff_imb = NULL;
- buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
- str_loc.func, team_size, str_loc.file, str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb );
- __itt_suppress_pop();
- __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end );
- __kmp_str_free( &buff_imb );
- } else {
- const char * buff = NULL;
- buff = __kmp_str_format("%s$omp$barrier@%s:%d",
- str_loc.func, str_loc.file, str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
- __itt_suppress_pop();
- __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end );
- __kmp_str_free( &buff );
- }
- __kmp_str_loc_free( &str_loc );
- }
- } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
- if( imbalance ) {
- __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end );
- } else {
- __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end );
- }
- }
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n",
- gtid, loc->reserved_2, loc, begin, end );
- }
- }
-#endif
-} // __kmp_itt_frame_submit
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) {
-#if USE_ITT_NOTIFY
- if( metadata_domain == NULL) {
- __kmp_acquire_bootstrap_lock( & metadata_lock );
- if( metadata_domain == NULL) {
- __itt_suppress_push(__itt_suppress_memory_errors);
- metadata_domain = __itt_domain_create( "OMP Metadata" );
- __itt_suppress_pop();
- }
- __kmp_release_bootstrap_lock( & metadata_lock );
- }
-
- __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance");
-
- kmp_uint64 imbalance_data[ 4 ];
- imbalance_data[ 0 ] = begin;
- imbalance_data[ 1 ] = end;
- imbalance_data[ 2 ] = imbalance;
- imbalance_data[ 3 ] = reduction;
-
- __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data);
-#endif
-} // __kmp_itt_metadata_imbalance
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) {
-#if USE_ITT_NOTIFY
- if( metadata_domain == NULL) {
- __kmp_acquire_bootstrap_lock( & metadata_lock );
- if( metadata_domain == NULL) {
- __itt_suppress_push(__itt_suppress_memory_errors);
- metadata_domain = __itt_domain_create( "OMP Metadata" );
- __itt_suppress_pop();
- }
- __kmp_release_bootstrap_lock( & metadata_lock );
- }
-
- __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop");
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
- kmp_uint64 loop_data[ 5 ];
- loop_data[ 0 ] = str_loc.line;
- loop_data[ 1 ] = str_loc.col;
- loop_data[ 2 ] = sched_type;
- loop_data[ 3 ] = iterations;
- loop_data[ 4 ] = chunk;
-
- __kmp_str_loc_free( &str_loc );
-
- __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data);
-#endif
-} // __kmp_itt_metadata_loop
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_metadata_single( ident_t * loc ) {
-#if USE_ITT_NOTIFY
- if( metadata_domain == NULL) {
- __kmp_acquire_bootstrap_lock( & metadata_lock );
- if( metadata_domain == NULL) {
- __itt_suppress_push(__itt_suppress_memory_errors);
- metadata_domain = __itt_domain_create( "OMP Metadata" );
- __itt_suppress_pop();
- }
- __kmp_release_bootstrap_lock( & metadata_lock );
- }
-
- __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single");
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
- kmp_uint64 single_data[ 2 ];
- single_data[ 0 ] = str_loc.line;
- single_data[ 1 ] = str_loc.col;
-
- __kmp_str_loc_free( &str_loc );
-
- __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data);
-#endif
-} // __kmp_itt_metadata_single
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_region_starting( int gtid ) {
-#if USE_ITT_NOTIFY
-#endif
-} // __kmp_itt_region_starting
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_region_finished( int gtid ) {
-#if USE_ITT_NOTIFY
-#endif
-} // __kmp_itt_region_finished
-
-// -------------------------------------------------------------------------------------------------
-
-LINKAGE void
-__kmp_itt_region_joined( int gtid, int serialized ) {
-#if USE_ITT_NOTIFY
- kmp_team_t * team = __kmp_team_from_gtid( gtid );
- if (team->t.t_active_level + serialized > 1)
- {
- // The frame notifications are only supported for the outermost teams.
- return;
- }
- ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident;
- if (loc && loc->reserved_2)
- {
- int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if(frm < KMP_MAX_FRAME_DOMAINS) {
- KMP_ITT_DEBUG_LOCK();
- __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
- KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
- gtid, loc->reserved_2, serialized, loc );
- }
- }
-#endif
-} // __kmp_itt_region_joined
-
-/*
- ------------------------------------------------------------------------------------------------
- Barriers reporting.
-
- A barrier consists of two phases:
-
- 1. Gather -- master waits for arriving of all the worker threads; each worker thread
- registers arrival and goes further.
- 2. Release -- each worker threads waits until master lets it go; master lets worker threads
- go.
-
- Function should be called by each thread:
-
- * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
- * __kmp_itt_barrier_middle() -- between gather and release phases.
- * __kmp_itt_barrier_finished() -- after release phase.
-
- Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save
- result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather
- phase) would return itt sync object for the next barrier!
-
- ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have
- barrier object or barrier data structure. Barrier is just a counter in team and thread
- structures. We could use an address of team structure as an barrier sync object, but ITT wants
- different objects for different barriers (even whithin the same team). So let us use
- team address as barrier sync object for the first barrier, then increase it by one for the next
- barrier, and so on (but wrap it not to use addresses outside of team structure).
-
- ------------------------------------------------------------------------------------------------
-*/
-
-void *
-__kmp_itt_barrier_object(
- int gtid,
- int bt,
- int set_name,
- int delta // 0 (current barrier) is default value; specify -1 to get previous barrier.
-) {
- void * object = NULL;
-#if USE_ITT_NOTIFY
- kmp_info_t * thr = __kmp_thread_from_gtid( gtid );
- kmp_team_t * team = thr->th.th_team;
-
- // NOTE:
- // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if"
- // helps to avoid crash. However, this is not complete solution, and reporting fork/join
- // barriers to ITT should be revisited.
-
- if ( team != NULL ) {
-
- // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived
- // by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
- kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
- // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of
- // different types do not have the same ids.
- KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier );
- // This conditon is a must (we would have zero divide otherwise).
- KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier );
- // More strong condition: make sure we have room at least for for two differtent ids
- // (for each barrier type).
- object =
- reinterpret_cast< void * >(
- kmp_uintptr_t( team )
- + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier
- + bt
- );
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object );
-
- if ( set_name ) {
- ident_t const * loc = NULL;
- char const * src = NULL;
- char const * type = "OMP Barrier";
- switch ( bt ) {
- case bs_plain_barrier : {
- // For plain barrier compiler calls __kmpc_barrier() function, which saves
- // location in thr->th.th_ident.
- loc = thr->th.th_ident;
- // Get the barrier type from flags provided by compiler.
- kmp_int32 expl = 0;
- kmp_uint32 impl = 0;
- if ( loc != NULL ) {
- src = loc->psource;
- expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0;
- impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0;
- }; // if
- if ( impl ) {
- switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) {
- case KMP_IDENT_BARRIER_IMPL_FOR : {
- type = "OMP For Barrier";
- } break;
- case KMP_IDENT_BARRIER_IMPL_SECTIONS : {
- type = "OMP Sections Barrier";
- } break;
- case KMP_IDENT_BARRIER_IMPL_SINGLE : {
- type = "OMP Single Barrier";
- } break;
- case KMP_IDENT_BARRIER_IMPL_WORKSHARE : {
- type = "OMP Workshare Barrier";
- } break;
- default : {
- type = "OMP Implicit Barrier";
- KMP_DEBUG_ASSERT( 0 );
- };
- }; /* switch */
- } else if ( expl ) {
- type = "OMP Explicit Barrier";
- }; /* if */
- } break;
- case bs_forkjoin_barrier : {
- // In case of fork/join barrier we can read thr->th.th_ident, because it
- // contains location of last passed construct (while join barrier is not
- // such one). Use th_ident of master thread instead -- __kmp_join_call()
- // called by the master thread saves location.
- //
- // AC: cannot read from master because __kmp_join_call may be not called
- // yet, so we read the location from team. This is the same location.
- // And team is valid at the enter to join barrier where this happens.
- loc = team->t.t_ident;
- if ( loc != NULL ) {
- src = loc->psource;
- }; // if
- type = "OMP Join Barrier";
- } break;
- }; // switch
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_create( object, type, src, __itt_attr_barrier );
- KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src );
- }; // if
-
- }; // if
-#endif
- return object;
-} // __kmp_itt_barrier_object
-
-// -------------------------------------------------------------------------------------------------
-
-void
-__kmp_itt_barrier_starting( int gtid, void * object ) {
-#if USE_ITT_NOTIFY
- if ( !KMP_MASTER_GTID( gtid ) ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_releasing( object );
- KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object );
- }; // if
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_prepare( object );
- KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object );
-#endif
-} // __kmp_itt_barrier_starting
-
-// -------------------------------------------------------------------------------------------------
-
-void
-__kmp_itt_barrier_middle( int gtid, void * object ) {
-#if USE_ITT_NOTIFY
- if ( KMP_MASTER_GTID( gtid ) ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_acquired( object );
- KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_releasing( object );
- KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object );
- } else {
- }; // if
-#endif
-} // __kmp_itt_barrier_middle
-
-// -------------------------------------------------------------------------------------------------
-
-void
-__kmp_itt_barrier_finished( int gtid, void * object ) {
-#if USE_ITT_NOTIFY
- if ( KMP_MASTER_GTID( gtid ) ) {
- } else {
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_acquired( object );
- KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object );
- }; // if
-#endif
-} // __kmp_itt_barrier_finished
-
-/*
- ------------------------------------------------------------------------------------------------
- Taskwait reporting.
-
- ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait
- structure, so we need to construct something.
-
-*/
-
-void *
-__kmp_itt_taskwait_object( int gtid ) {
- void * object = NULL;
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- kmp_info_t * thread = __kmp_thread_from_gtid( gtid );
- kmp_taskdata_t * taskdata = thread -> th.th_current_task;
- object =
- reinterpret_cast< void * >(
- kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t )
- );
- }; // if
-#endif
- return object;
-} // __kmp_itt_taskwait_object
-
-void
-__kmp_itt_taskwait_starting(
- int gtid,
- void * object
-) {
-#if USE_ITT_NOTIFY
- kmp_info_t * thread = __kmp_thread_from_gtid( gtid );
- kmp_taskdata_t * taskdata = thread -> th.th_current_task;
- ident_t const * loc = taskdata->td_taskwait_ident;
- char const * src = ( loc == NULL? NULL : loc->psource );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_create( object, "OMP Taskwait", src, 0 );
- KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_prepare( object );
- KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object );
-#endif
-} // __kmp_itt_taskwait_starting
-
-void
-__kmp_itt_taskwait_finished(
- int gtid,
- void * object
-) {
-#if USE_ITT_NOTIFY
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_acquired( object );
- KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_destroy( object );
- KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object );
-#endif
-} // __kmp_itt_taskwait_finished
-
-/*
- ------------------------------------------------------------------------------------------------
- Task reporting.
-
- Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait).
- Synch object passed to the function must be barrier of taskwait the threads waiting at.
- ------------------------------------------------------------------------------------------------
-*/
-
-void
-__kmp_itt_task_starting(
- void * object // ITT sync object: barrier or taskwait.
-) {
-#if USE_ITT_NOTIFY
- if ( object != NULL ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_cancel( object );
- KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object );
- }; // if
-#endif
-} // __kmp_itt_task_starting
-
-// -------------------------------------------------------------------------------------------------
-
-void
-__kmp_itt_task_finished(
- void * object // ITT sync object: barrier or taskwait.
-) {
-#if USE_ITT_NOTIFY
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_prepare( object );
- KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object );
-#endif
-} // __kmp_itt_task_finished
-
-// -------------------------------------------------------------------------------------------------
-
-/*
- ------------------------------------------------------------------------------------------------
- Lock reporting.
-
- * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation
- (set/unset). It is not a real event shown to the user but just setting a name for
- synchronization object. `lock' is an address of sync object, the same address should be
- used in all subsequent calls.
-
- * __kmp_itt_lock_acquiring() should be called before setting the lock.
-
- * __kmp_itt_lock_acquired() should be called after setting the lock.
-
- * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
-
- * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock.
-
- * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After
- __kmp_itt_lock_destroyed() all the references to the same address will be considered
- as another sync object, not related with the original one.
- ------------------------------------------------------------------------------------------------
-*/
-
-// -------------------------------------------------------------------------------------------------
-
-#if KMP_USE_DYNAMIC_LOCK
-// Takes location information directly
-__kmp_inline
-void
-___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- char const * src = ( loc == NULL ? NULL : loc->psource );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_create( lock, type, src, 0 );
- KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
- }
-#endif
-}
-#else // KMP_USE_DYNAMIC_LOCK
-// Internal guts -- common code for locks and critical sections, do not call directly.
-__kmp_inline
-void
-___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- ident_t const * loc = NULL;
- if ( __kmp_get_user_lock_location_ != NULL )
- loc = __kmp_get_user_lock_location_( (lock) );
- char const * src = ( loc == NULL ? NULL : loc->psource );
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_create( lock, type, src, 0 );
- KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
- }; // if
-#endif
-} // ___kmp_itt_lock_init
-#endif // KMP_USE_DYNAMIC_LOCK
-
-// Internal guts -- common code for locks and critical sections, do not call directly.
-__kmp_inline
-void
-___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) {
-#if USE_ITT_NOTIFY
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_destroy( lock );
- KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock );
-#endif
-} // ___kmp_itt_lock_fini
-
-
-// -------------------------------------------------------------------------------------------------
-
-#if KMP_USE_DYNAMIC_LOCK
-void
-__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) {
- ___kmp_itt_lock_init( lock, "OMP Lock", loc );
-}
-#else
-void
-__kmp_itt_lock_creating( kmp_user_lock_p lock ) {
- ___kmp_itt_lock_init( lock, "OMP Lock" );
-} // __kmp_itt_lock_creating
-#endif
-
-void
-__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) {
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
- // postpone lock object access
- if ( __itt_sync_prepare_ptr ) {
- if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __itt_sync_prepare( ilk->lock );
- } else {
- __itt_sync_prepare( lock );
- }
- }
-#else
- __itt_sync_prepare( lock );
-#endif
-} // __kmp_itt_lock_acquiring
-
-void
-__kmp_itt_lock_acquired( kmp_user_lock_p lock ) {
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
- // postpone lock object access
- if ( __itt_sync_acquired_ptr ) {
- if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __itt_sync_acquired( ilk->lock );
- } else {
- __itt_sync_acquired( lock );
- }
- }
-#else
- __itt_sync_acquired( lock );
-#endif
-} // __kmp_itt_lock_acquired
-
-void
-__kmp_itt_lock_releasing( kmp_user_lock_p lock ) {
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
- if ( __itt_sync_releasing_ptr ) {
- if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __itt_sync_releasing( ilk->lock );
- } else {
- __itt_sync_releasing( lock );
- }
- }
-#else
- __itt_sync_releasing( lock );
-#endif
-} // __kmp_itt_lock_releasing
-
-void
-__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) {
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
- if ( __itt_sync_cancel_ptr ) {
- if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __itt_sync_cancel( ilk->lock );
- } else {
- __itt_sync_cancel( lock );
- }
- }
-#else
- __itt_sync_cancel( lock );
-#endif
-} // __kmp_itt_lock_cancelled
-
-void
-__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) {
- ___kmp_itt_lock_fini( lock, "OMP Lock" );
-} // __kmp_itt_lock_destroyed
-
-/*
- ------------------------------------------------------------------------------------------------
- Critical reporting.
-
- Critical sections are treated exactly as locks (but have different object type).
- ------------------------------------------------------------------------------------------------
-*/
-#if KMP_USE_DYNAMIC_LOCK
-void
-__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) {
- ___kmp_itt_lock_init( lock, "OMP Critical", loc);
-}
-#else
-void
-__kmp_itt_critical_creating( kmp_user_lock_p lock ) {
- ___kmp_itt_lock_init( lock, "OMP Critical" );
-} // __kmp_itt_critical_creating
-#endif
-
-void
-__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) {
- __itt_sync_prepare( lock );
-} // __kmp_itt_critical_acquiring
-
-void
-__kmp_itt_critical_acquired( kmp_user_lock_p lock ) {
- __itt_sync_acquired( lock );
-} // __kmp_itt_critical_acquired
-
-void
-__kmp_itt_critical_releasing( kmp_user_lock_p lock ) {
- __itt_sync_releasing( lock );
-} // __kmp_itt_critical_releasing
-
-void
-__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) {
- ___kmp_itt_lock_fini( lock, "OMP Critical" );
-} // __kmp_itt_critical_destroyed
-
-/*
- ------------------------------------------------------------------------------------------------
- Single reporting.
- ------------------------------------------------------------------------------------------------
-*/
-
-void
-__kmp_itt_single_start( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) {
- kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) );
- ident_t * loc = thr->th.th_ident;
- char const * src = ( loc == NULL ? NULL : loc->psource );
- kmp_str_buf_t name;
- __kmp_str_buf_init( & name );
- __kmp_str_buf_print( & name, "OMP Single-%s", src );
- KMP_ITT_DEBUG_LOCK();
- thr->th.th_itt_mark_single = __itt_mark_create( name.str );
- KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single );
- __kmp_str_buf_free( & name );
- KMP_ITT_DEBUG_LOCK();
- __itt_mark( thr->th.th_itt_mark_single, NULL );
- KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single );
- }; // if
-#endif
-} // __kmp_itt_single_start
-
-void
-__kmp_itt_single_end( int gtid ) {
-#if USE_ITT_NOTIFY
- __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single;
- KMP_ITT_DEBUG_LOCK();
- __itt_mark_off( mark );
- KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark );
-#endif
-} // __kmp_itt_single_end
-
-/*
- ------------------------------------------------------------------------------------------------
- Ordered reporting.
-
- __kmp_itt_ordered_init is called by each thread *before* first using sync
- object. ITT team would like it to be called once, but it requires extra synchronization.
-
- __kmp_itt_ordered_prep is called when thread is going to enter ordered section
- (before synchronization).
-
- __kmp_itt_ordered_start is called just before entering user code (after
- synchronization).
-
- __kmp_itt_ordered_end is called after returning from user code.
-
- Sync object is th->th.th_dispatch->th_dispatch_sh_current.
-
- Events are not generated in case of serialized team.
- ------------------------------------------------------------------------------------------------
-*/
-
-void
-__kmp_itt_ordered_init( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- kmp_info_t * thr = __kmp_thread_from_gtid( gtid );
- ident_t const * loc = thr->th.th_ident;
- char const * src = ( loc == NULL ? NULL : loc->psource );
- __itt_sync_create(
- thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0
- );
- }; // if
-#endif
-} // __kmp_itt_ordered_init
-
-void
-__kmp_itt_ordered_prep( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- kmp_team_t * t = __kmp_team_from_gtid( gtid );
- if ( ! t->t.t_serialized ) {
- kmp_info_t * th = __kmp_thread_from_gtid( gtid );
- __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current );
- }; // if
- }; // if
-#endif
-} // __kmp_itt_ordered_prep
-
-void
-__kmp_itt_ordered_start( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- kmp_team_t * t = __kmp_team_from_gtid( gtid );
- if ( ! t->t.t_serialized ) {
- kmp_info_t * th = __kmp_thread_from_gtid( gtid );
- __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current );
- }; // if
- }; // if
-#endif
-} // __kmp_itt_ordered_start
-
-void
-__kmp_itt_ordered_end( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr ) {
- kmp_team_t * t = __kmp_team_from_gtid( gtid );
- if ( ! t->t.t_serialized ) {
- kmp_info_t * th = __kmp_thread_from_gtid( gtid );
- __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current );
- }; // if
- }; // if
-#endif
-} // __kmp_itt_ordered_end
-
-
-/*
- ------------------------------------------------------------------------------------------------
- Threads reporting.
- ------------------------------------------------------------------------------------------------
-*/
-
-void
-__kmp_itt_thread_ignore() {
- __itt_thr_ignore();
-} // __kmp_itt_thread_ignore
-
-void
-__kmp_itt_thread_name( int gtid ) {
-#if USE_ITT_NOTIFY
- if ( __itt_thr_name_set_ptr ) {
- kmp_str_buf_t name;
- __kmp_str_buf_init( & name );
- if( KMP_MASTER_GTID(gtid) ) {
- __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid );
- } else {
- __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid );
- }
- KMP_ITT_DEBUG_LOCK();
- __itt_thr_name_set( name.str, name.used );
- KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str );
- __kmp_str_buf_free( & name );
- }; // if
-#endif
-} // __kmp_itt_thread_name
-
-
-/*
- --------------------------------------------------------------------------
- System object reporting.
-
- ITT catches operations with system sync objects (like Windows* OS on IA-32
- architecture API critical sections and events). We only need to specify
- name ("OMP Scheduler") for the object to let ITT know it is an object used
- by OpenMP RTL for internal purposes.
- --------------------------------------------------------------------------
-*/
-
-void
-__kmp_itt_system_object_created( void * object, char const * name ) {
-#if USE_ITT_NOTIFY
- KMP_ITT_DEBUG_LOCK();
- __itt_sync_create( object, "OMP Scheduler", name, 0 );
- KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name );
-#endif
-} // __kmp_itt_system_object_created
-
-
-/*
- ------------------------------------------------------------------------------------------------
- Stack stitching api.
-
- Master calls "create" and put the stitching id into team structure.
- Workers read the stitching id and call "enter" / "leave" api.
- Master calls "destroy" at the end of the parallel region.
- ------------------------------------------------------------------------------------------------
-*/
-
-__itt_caller
-__kmp_itt_stack_caller_create()
-{
-#if USE_ITT_NOTIFY
- if ( !__itt_stack_caller_create_ptr )
- return NULL;
- KMP_ITT_DEBUG_LOCK();
- __itt_caller id = __itt_stack_caller_create();
- KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id );
- return id;
-#endif
- return NULL;
-}
-
-void
-__kmp_itt_stack_caller_destroy( __itt_caller id )
-{
-#if USE_ITT_NOTIFY
- if ( __itt_stack_caller_destroy_ptr ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_stack_caller_destroy( id );
- KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id );
- }
-#endif
-}
-
-void
-__kmp_itt_stack_callee_enter( __itt_caller id )
-{
-#if USE_ITT_NOTIFY
- if ( __itt_stack_callee_enter_ptr ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_stack_callee_enter( id );
- KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id );
- }
-#endif
-}
-
-void
-__kmp_itt_stack_callee_leave( __itt_caller id )
-{
-#if USE_ITT_NOTIFY
- if ( __itt_stack_callee_leave_ptr ) {
- KMP_ITT_DEBUG_LOCK();
- __itt_stack_callee_leave( id );
- KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id );
- }
-#endif
-}
-
-#endif /* USE_ITT_BUILD */
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.inl -- Inline functions of ITT Notify.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Inline function definitions. This file should be included into kmp_itt.h file for prodiction
+// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce
+// the number of files to recompile and save build time).
+
+
+#include "kmp.h"
+#include "kmp_str.h"
+
+#if KMP_ITT_DEBUG
+ extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
+ #define KMP_ITT_DEBUG_LOCK() { \
+ __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \
+ }
+ #define KMP_ITT_DEBUG_PRINT( ... ) { \
+ fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \
+ fprintf( stderr, __VA_ARGS__ ); \
+ fflush( stderr ); \
+ __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \
+ }
+#else
+ #define KMP_ITT_DEBUG_LOCK()
+ #define KMP_ITT_DEBUG_PRINT( ... )
+#endif // KMP_ITT_DEBUG
+
+// Ensure that the functions are static if they're supposed to be
+// being inlined. Otherwise they cannot be used in more than one file,
+// since there will be multiple definitions.
+#if KMP_DEBUG
+# define LINKAGE
+#else
+# define LINKAGE static inline
+#endif
+
+// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this
+// API to support user-defined synchronization primitives, but does not use ZCA;
+// it would be safe to turn this off until wider support becomes available.
+#if USE_ITT_ZCA
+#ifdef __INTEL_COMPILER
+# if __INTEL_COMPILER >= 1200
+# undef __itt_sync_acquired
+# undef __itt_sync_releasing
+# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr)
+# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr)
+# endif
+#endif
+#endif
+
+static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock );
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Parallel region reporting.
+
+ * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of
+ call does not matter, but it should be completed before any thread of this team calls
+ __kmp_itt_region_starting.
+ * __kmp_itt_region_starting should be called by each thread of a team just before entering
+ parallel region body.
+ * __kmp_itt_region_finished should be called by each thread of a team right after returning
+ from parallel region body.
+ * __kmp_itt_region_joined should be called by master thread of a team, after all threads
+ called __kmp_itt_region_finished.
+
+ Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more
+ user code -- such a thread can execute tasks.
+
+ Note: The overhead of logging region_starting and region_finished in each thread is too large,
+ so these calls are not used.
+
+ ------------------------------------------------------------------------------------------------
+*/
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) {
+#if USE_ITT_NOTIFY
+ kmp_team_t * team = __kmp_team_from_gtid( gtid );
+ if (team->t.t_active_level + serialized > 1)
+ {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident;
+ if (loc) {
+ // Use the reserved_2 field to store the index to the region domain.
+ // Assume that reserved_2 contains zero initially. Since zero is special
+ // value here, store the index into domain array increased by 1.
+ if (loc->reserved_2 == 0) {
+ if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
+ // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc
+ //} // AC: this block is to replace next unsynchronized line
+
+ // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
+ // field but put region index to the low two bytes and barrier indexes to the high
+ // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
+ loc->reserved_2 |= (frm + 1); // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel@[file:]<line>[:<col>]"
+ const char * buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+ str_loc.func, team_size, str_loc.file,
+ str_loc.line, str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
+ __kmp_str_free( &buff );
+ if( barriers ) {
+ if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ const char * buff = NULL;
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+ str_loc.func, str_loc.file, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+ __kmp_str_free( &buff );
+ // Save the barrier frame index to the high two bytes.
+ loc->reserved_2 |= (frm + 1) << 16;
+ }
+ }
+ __kmp_str_loc_free( &str_loc );
+ __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL);
+ }
+ } else { // Region domain exists for this location
+ // Check if team size was changed. Then create new region domain for this location
+ int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if( __kmp_itt_region_team_size[frm] != team_size ) {
+ const char * buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+ str_loc.func, team_size, str_loc.file,
+ str_loc.line, str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
+ __kmp_str_free( &buff );
+ __kmp_str_loc_free( &str_loc );
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+ } else { // Team size was not changed. Use existing domain.
+ __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
+ gtid, loc->reserved_2, serialized, loc );
+ }
+#endif
+} // __kmp_itt_region_forking
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) {
+#if USE_ITT_NOTIFY
+ if( region ) {
+ kmp_team_t * team = __kmp_team_from_gtid( gtid );
+ int serialized = ( region == 2 ? 1 : 0 );
+ if (team->t.t_active_level + serialized > 1)
+ {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ //Check region domain has not been created before. It's index is saved in the low two bytes.
+ if ((loc->reserved_2 & 0x0000FFFF) == 0) {
+ if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+
+ // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
+ // field but put region index to the low two bytes and barrier indexes to the high
+ // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
+ loc->reserved_2 |= (frm + 1); // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
+ const char * buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+ str_loc.func, team_size, str_loc.file,
+ str_loc.line, str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
+ __kmp_str_free( &buff );
+ __kmp_str_loc_free( &str_loc );
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+ }
+ } else { // Region domain exists for this location
+ // Check if team size was changed. Then create new region domain for this location
+ int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if( __kmp_itt_region_team_size[frm] != team_size ) {
+ const char * buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+ str_loc.func, team_size, str_loc.file,
+ str_loc.line, str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
+ __kmp_str_free( &buff );
+ __kmp_str_loc_free( &str_loc );
+ __kmp_itt_region_team_size[frm] = team_size;
+ __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+ } else { // Team size was not changed. Use existing domain.
+ __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
+ gtid, loc->reserved_2, region, loc, begin, end );
+ return;
+ } else { // called for barrier reporting
+ if (loc) {
+ if ((loc->reserved_2 & 0xFFFF0000) == 0) {
+ if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ // Save the barrier frame index to the high two bytes.
+ loc->reserved_2 |= (frm + 1) << 16; // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$frame@[file:]<line>[:<col>]"
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ if( imbalance ) {
+ const char * buff_imb = NULL;
+ buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
+ str_loc.func, team_size, str_loc.file, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb );
+ __itt_suppress_pop();
+ __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end );
+ __kmp_str_free( &buff_imb );
+ } else {
+ const char * buff = NULL;
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+ str_loc.func, str_loc.file, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+ __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end );
+ __kmp_str_free( &buff );
+ }
+ __kmp_str_loc_free( &str_loc );
+ }
+ } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
+ if( imbalance ) {
+ __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end );
+ } else {
+ __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end );
+ }
+ }
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n",
+ gtid, loc->reserved_2, loc, begin, end );
+ }
+ }
+#endif
+} // __kmp_itt_frame_submit
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) {
+#if USE_ITT_NOTIFY
+ if( metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock( & metadata_lock );
+ if( metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create( "OMP Metadata" );
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock( & metadata_lock );
+ }
+
+ __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance");
+
+ kmp_uint64 imbalance_data[ 4 ];
+ imbalance_data[ 0 ] = begin;
+ imbalance_data[ 1 ] = end;
+ imbalance_data[ 2 ] = imbalance;
+ imbalance_data[ 3 ] = reduction;
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data);
+#endif
+} // __kmp_itt_metadata_imbalance
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) {
+#if USE_ITT_NOTIFY
+ if( metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock( & metadata_lock );
+ if( metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create( "OMP Metadata" );
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock( & metadata_lock );
+ }
+
+ __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop");
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+
+ kmp_uint64 loop_data[ 5 ];
+ loop_data[ 0 ] = str_loc.line;
+ loop_data[ 1 ] = str_loc.col;
+ loop_data[ 2 ] = sched_type;
+ loop_data[ 3 ] = iterations;
+ loop_data[ 4 ] = chunk;
+
+ __kmp_str_loc_free( &str_loc );
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data);
+#endif
+} // __kmp_itt_metadata_loop
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_single( ident_t * loc ) {
+#if USE_ITT_NOTIFY
+ if( metadata_domain == NULL) {
+ __kmp_acquire_bootstrap_lock( & metadata_lock );
+ if( metadata_domain == NULL) {
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ metadata_domain = __itt_domain_create( "OMP Metadata" );
+ __itt_suppress_pop();
+ }
+ __kmp_release_bootstrap_lock( & metadata_lock );
+ }
+
+ __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single");
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ kmp_uint64 single_data[ 2 ];
+ single_data[ 0 ] = str_loc.line;
+ single_data[ 1 ] = str_loc.col;
+
+ __kmp_str_loc_free( &str_loc );
+
+ __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data);
+#endif
+} // __kmp_itt_metadata_single
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_starting( int gtid ) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_starting
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_finished( int gtid ) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_finished
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_joined( int gtid, int serialized ) {
+#if USE_ITT_NOTIFY
+ kmp_team_t * team = __kmp_team_from_gtid( gtid );
+ if (team->t.t_active_level + serialized > 1)
+ {
+ // The frame notifications are only supported for the outermost teams.
+ return;
+ }
+ ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident;
+ if (loc && loc->reserved_2)
+ {
+ int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+ if(frm < KMP_MAX_FRAME_DOMAINS) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
+ KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
+ gtid, loc->reserved_2, serialized, loc );
+ }
+ }
+#endif
+} // __kmp_itt_region_joined
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Barriers reporting.
+
+ A barrier consists of two phases:
+
+ 1. Gather -- master waits for arriving of all the worker threads; each worker thread
+ registers arrival and goes further.
+ 2. Release -- each worker threads waits until master lets it go; master lets worker threads
+ go.
+
+ Function should be called by each thread:
+
+ * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
+ * __kmp_itt_barrier_middle() -- between gather and release phases.
+ * __kmp_itt_barrier_finished() -- after release phase.
+
+ Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save
+ result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather
+ phase) would return itt sync object for the next barrier!
+
+ ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have
+ barrier object or barrier data structure. Barrier is just a counter in team and thread
+ structures. We could use an address of team structure as an barrier sync object, but ITT wants
+ different objects for different barriers (even whithin the same team). So let us use
+ team address as barrier sync object for the first barrier, then increase it by one for the next
+ barrier, and so on (but wrap it not to use addresses outside of team structure).
+
+ ------------------------------------------------------------------------------------------------
+*/
+
+void *
+__kmp_itt_barrier_object(
+ int gtid,
+ int bt,
+ int set_name,
+ int delta // 0 (current barrier) is default value; specify -1 to get previous barrier.
+) {
+ void * object = NULL;
+#if USE_ITT_NOTIFY
+ kmp_info_t * thr = __kmp_thread_from_gtid( gtid );
+ kmp_team_t * team = thr->th.th_team;
+
+ // NOTE:
+ // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if"
+ // helps to avoid crash. However, this is not complete solution, and reporting fork/join
+ // barriers to ITT should be revisited.
+
+ if ( team != NULL ) {
+
+ // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived
+ // by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
+ kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
+ // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of
+ // different types do not have the same ids.
+ KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier );
+ // This conditon is a must (we would have zero divide otherwise).
+ KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier );
+ // More strong condition: make sure we have room at least for for two differtent ids
+ // (for each barrier type).
+ object =
+ reinterpret_cast< void * >(
+ kmp_uintptr_t( team )
+ + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier
+ + bt
+ );
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object );
+
+ if ( set_name ) {
+ ident_t const * loc = NULL;
+ char const * src = NULL;
+ char const * type = "OMP Barrier";
+ switch ( bt ) {
+ case bs_plain_barrier : {
+ // For plain barrier compiler calls __kmpc_barrier() function, which saves
+ // location in thr->th.th_ident.
+ loc = thr->th.th_ident;
+ // Get the barrier type from flags provided by compiler.
+ kmp_int32 expl = 0;
+ kmp_uint32 impl = 0;
+ if ( loc != NULL ) {
+ src = loc->psource;
+ expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0;
+ impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0;
+ }; // if
+ if ( impl ) {
+ switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) {
+ case KMP_IDENT_BARRIER_IMPL_FOR : {
+ type = "OMP For Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_SECTIONS : {
+ type = "OMP Sections Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_SINGLE : {
+ type = "OMP Single Barrier";
+ } break;
+ case KMP_IDENT_BARRIER_IMPL_WORKSHARE : {
+ type = "OMP Workshare Barrier";
+ } break;
+ default : {
+ type = "OMP Implicit Barrier";
+ KMP_DEBUG_ASSERT( 0 );
+ };
+ }; /* switch */
+ } else if ( expl ) {
+ type = "OMP Explicit Barrier";
+ }; /* if */
+ } break;
+ case bs_forkjoin_barrier : {
+ // In case of fork/join barrier we can read thr->th.th_ident, because it
+ // contains location of last passed construct (while join barrier is not
+ // such one). Use th_ident of master thread instead -- __kmp_join_call()
+ // called by the master thread saves location.
+ //
+ // AC: cannot read from master because __kmp_join_call may be not called
+ // yet, so we read the location from team. This is the same location.
+ // And team is valid at the enter to join barrier where this happens.
+ loc = team->t.t_ident;
+ if ( loc != NULL ) {
+ src = loc->psource;
+ }; // if
+ type = "OMP Join Barrier";
+ } break;
+ }; // switch
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create( object, type, src, __itt_attr_barrier );
+ KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src );
+ }; // if
+
+ }; // if
+#endif
+ return object;
+} // __kmp_itt_barrier_object
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_starting( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+ if ( !KMP_MASTER_GTID( gtid ) ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_releasing( object );
+ KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object );
+ }; // if
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare( object );
+ KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object );
+#endif
+} // __kmp_itt_barrier_starting
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_middle( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+ if ( KMP_MASTER_GTID( gtid ) ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired( object );
+ KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_releasing( object );
+ KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object );
+ } else {
+ }; // if
+#endif
+} // __kmp_itt_barrier_middle
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_finished( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+ if ( KMP_MASTER_GTID( gtid ) ) {
+ } else {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired( object );
+ KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object );
+ }; // if
+#endif
+} // __kmp_itt_barrier_finished
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Taskwait reporting.
+
+ ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait
+ structure, so we need to construct something.
+
+*/
+
+void *
+__kmp_itt_taskwait_object( int gtid ) {
+ void * object = NULL;
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ kmp_info_t * thread = __kmp_thread_from_gtid( gtid );
+ kmp_taskdata_t * taskdata = thread -> th.th_current_task;
+ object =
+ reinterpret_cast< void * >(
+ kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t )
+ );
+ }; // if
+#endif
+ return object;
+} // __kmp_itt_taskwait_object
+
+void
+__kmp_itt_taskwait_starting(
+ int gtid,
+ void * object
+) {
+#if USE_ITT_NOTIFY
+ kmp_info_t * thread = __kmp_thread_from_gtid( gtid );
+ kmp_taskdata_t * taskdata = thread -> th.th_current_task;
+ ident_t const * loc = taskdata->td_taskwait_ident;
+ char const * src = ( loc == NULL? NULL : loc->psource );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create( object, "OMP Taskwait", src, 0 );
+ KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare( object );
+ KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object );
+#endif
+} // __kmp_itt_taskwait_starting
+
+void
+__kmp_itt_taskwait_finished(
+ int gtid,
+ void * object
+) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_acquired( object );
+ KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_destroy( object );
+ KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object );
+#endif
+} // __kmp_itt_taskwait_finished
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Task reporting.
+
+ Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait).
+ Synch object passed to the function must be barrier of taskwait the threads waiting at.
+ ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_task_starting(
+ void * object // ITT sync object: barrier or taskwait.
+) {
+#if USE_ITT_NOTIFY
+ if ( object != NULL ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_cancel( object );
+ KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object );
+ }; // if
+#endif
+} // __kmp_itt_task_starting
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_task_finished(
+ void * object // ITT sync object: barrier or taskwait.
+) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_prepare( object );
+ KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object );
+#endif
+} // __kmp_itt_task_finished
+
+// -------------------------------------------------------------------------------------------------
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Lock reporting.
+
+ * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation
+ (set/unset). It is not a real event shown to the user but just setting a name for
+ synchronization object. `lock' is an address of sync object, the same address should be
+ used in all subsequent calls.
+
+ * __kmp_itt_lock_acquiring() should be called before setting the lock.
+
+ * __kmp_itt_lock_acquired() should be called after setting the lock.
+
+ * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
+
+ * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock.
+
+ * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After
+ __kmp_itt_lock_destroyed() all the references to the same address will be considered
+ as another sync object, not related with the original one.
+ ------------------------------------------------------------------------------------------------
+*/
+
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_USE_DYNAMIC_LOCK
+// Takes location information directly
+__kmp_inline
+void
+___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ char const * src = ( loc == NULL ? NULL : loc->psource );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create( lock, type, src, 0 );
+ KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
+ }
+#endif
+}
+#else // KMP_USE_DYNAMIC_LOCK
+// Internal guts -- common code for locks and critical sections, do not call directly.
+__kmp_inline
+void
+___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ ident_t const * loc = NULL;
+ if ( __kmp_get_user_lock_location_ != NULL )
+ loc = __kmp_get_user_lock_location_( (lock) );
+ char const * src = ( loc == NULL ? NULL : loc->psource );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create( lock, type, src, 0 );
+ KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
+ }; // if
+#endif
+} // ___kmp_itt_lock_init
+#endif // KMP_USE_DYNAMIC_LOCK
+
+// Internal guts -- common code for locks and critical sections, do not call directly.
+__kmp_inline
+void
+___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_destroy( lock );
+ KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock );
+#endif
+} // ___kmp_itt_lock_fini
+
+
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_USE_DYNAMIC_LOCK
+void
+__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) {
+ ___kmp_itt_lock_init( lock, "OMP Lock", loc );
+}
+#else
+void
+__kmp_itt_lock_creating( kmp_user_lock_p lock ) {
+ ___kmp_itt_lock_init( lock, "OMP Lock" );
+} // __kmp_itt_lock_creating
+#endif
+
+void
+__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ // postpone lock object access
+ if ( __itt_sync_prepare_ptr ) {
+ if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_prepare( ilk->lock );
+ } else {
+ __itt_sync_prepare( lock );
+ }
+ }
+#else
+ __itt_sync_prepare( lock );
+#endif
+} // __kmp_itt_lock_acquiring
+
+void
+__kmp_itt_lock_acquired( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ // postpone lock object access
+ if ( __itt_sync_acquired_ptr ) {
+ if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_acquired( ilk->lock );
+ } else {
+ __itt_sync_acquired( lock );
+ }
+ }
+#else
+ __itt_sync_acquired( lock );
+#endif
+} // __kmp_itt_lock_acquired
+
+void
+__kmp_itt_lock_releasing( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ if ( __itt_sync_releasing_ptr ) {
+ if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_releasing( ilk->lock );
+ } else {
+ __itt_sync_releasing( lock );
+ }
+ }
+#else
+ __itt_sync_releasing( lock );
+#endif
+} // __kmp_itt_lock_releasing
+
+void
+__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+ if ( __itt_sync_cancel_ptr ) {
+ if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+ __itt_sync_cancel( ilk->lock );
+ } else {
+ __itt_sync_cancel( lock );
+ }
+ }
+#else
+ __itt_sync_cancel( lock );
+#endif
+} // __kmp_itt_lock_cancelled
+
+void
+__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) {
+ ___kmp_itt_lock_fini( lock, "OMP Lock" );
+} // __kmp_itt_lock_destroyed
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Critical reporting.
+
+ Critical sections are treated exactly as locks (but have different object type).
+ ------------------------------------------------------------------------------------------------
+*/
+#if KMP_USE_DYNAMIC_LOCK
+void
+__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) {
+ ___kmp_itt_lock_init( lock, "OMP Critical", loc);
+}
+#else
+void
+__kmp_itt_critical_creating( kmp_user_lock_p lock ) {
+ ___kmp_itt_lock_init( lock, "OMP Critical" );
+} // __kmp_itt_critical_creating
+#endif
+
+void
+__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) {
+ __itt_sync_prepare( lock );
+} // __kmp_itt_critical_acquiring
+
+void
+__kmp_itt_critical_acquired( kmp_user_lock_p lock ) {
+ __itt_sync_acquired( lock );
+} // __kmp_itt_critical_acquired
+
+void
+__kmp_itt_critical_releasing( kmp_user_lock_p lock ) {
+ __itt_sync_releasing( lock );
+} // __kmp_itt_critical_releasing
+
+void
+__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) {
+ ___kmp_itt_lock_fini( lock, "OMP Critical" );
+} // __kmp_itt_critical_destroyed
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Single reporting.
+ ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_single_start( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) {
+ kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) );
+ ident_t * loc = thr->th.th_ident;
+ char const * src = ( loc == NULL ? NULL : loc->psource );
+ kmp_str_buf_t name;
+ __kmp_str_buf_init( & name );
+ __kmp_str_buf_print( & name, "OMP Single-%s", src );
+ KMP_ITT_DEBUG_LOCK();
+ thr->th.th_itt_mark_single = __itt_mark_create( name.str );
+ KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single );
+ __kmp_str_buf_free( & name );
+ KMP_ITT_DEBUG_LOCK();
+ __itt_mark( thr->th.th_itt_mark_single, NULL );
+ KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single );
+ }; // if
+#endif
+} // __kmp_itt_single_start
+
+void
+__kmp_itt_single_end( int gtid ) {
+#if USE_ITT_NOTIFY
+ __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single;
+ KMP_ITT_DEBUG_LOCK();
+ __itt_mark_off( mark );
+ KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark );
+#endif
+} // __kmp_itt_single_end
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Ordered reporting.
+
+ __kmp_itt_ordered_init is called by each thread *before* first using sync
+ object. ITT team would like it to be called once, but it requires extra synchronization.
+
+ __kmp_itt_ordered_prep is called when thread is going to enter ordered section
+ (before synchronization).
+
+ __kmp_itt_ordered_start is called just before entering user code (after
+ synchronization).
+
+ __kmp_itt_ordered_end is called after returning from user code.
+
+ Sync object is th->th.th_dispatch->th_dispatch_sh_current.
+
+ Events are not generated in case of serialized team.
+ ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_ordered_init( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ kmp_info_t * thr = __kmp_thread_from_gtid( gtid );
+ ident_t const * loc = thr->th.th_ident;
+ char const * src = ( loc == NULL ? NULL : loc->psource );
+ __itt_sync_create(
+ thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0
+ );
+ }; // if
+#endif
+} // __kmp_itt_ordered_init
+
+void
+__kmp_itt_ordered_prep( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ kmp_team_t * t = __kmp_team_from_gtid( gtid );
+ if ( ! t->t.t_serialized ) {
+ kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+ __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current );
+ }; // if
+ }; // if
+#endif
+} // __kmp_itt_ordered_prep
+
+void
+__kmp_itt_ordered_start( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ kmp_team_t * t = __kmp_team_from_gtid( gtid );
+ if ( ! t->t.t_serialized ) {
+ kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+ __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current );
+ }; // if
+ }; // if
+#endif
+} // __kmp_itt_ordered_start
+
+void
+__kmp_itt_ordered_end( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_sync_create_ptr ) {
+ kmp_team_t * t = __kmp_team_from_gtid( gtid );
+ if ( ! t->t.t_serialized ) {
+ kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+ __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current );
+ }; // if
+ }; // if
+#endif
+} // __kmp_itt_ordered_end
+
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Threads reporting.
+ ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_thread_ignore() {
+ __itt_thr_ignore();
+} // __kmp_itt_thread_ignore
+
+void
+__kmp_itt_thread_name( int gtid ) {
+#if USE_ITT_NOTIFY
+ if ( __itt_thr_name_set_ptr ) {
+ kmp_str_buf_t name;
+ __kmp_str_buf_init( & name );
+ if( KMP_MASTER_GTID(gtid) ) {
+ __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid );
+ } else {
+ __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid );
+ }
+ KMP_ITT_DEBUG_LOCK();
+ __itt_thr_name_set( name.str, name.used );
+ KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str );
+ __kmp_str_buf_free( & name );
+ }; // if
+#endif
+} // __kmp_itt_thread_name
+
+
+/*
+ --------------------------------------------------------------------------
+ System object reporting.
+
+ ITT catches operations with system sync objects (like Windows* OS on IA-32
+ architecture API critical sections and events). We only need to specify
+ name ("OMP Scheduler") for the object to let ITT know it is an object used
+ by OpenMP RTL for internal purposes.
+ --------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_system_object_created( void * object, char const * name ) {
+#if USE_ITT_NOTIFY
+ KMP_ITT_DEBUG_LOCK();
+ __itt_sync_create( object, "OMP Scheduler", name, 0 );
+ KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name );
+#endif
+} // __kmp_itt_system_object_created
+
+
+/*
+ ------------------------------------------------------------------------------------------------
+ Stack stitching api.
+
+ Master calls "create" and put the stitching id into team structure.
+ Workers read the stitching id and call "enter" / "leave" api.
+ Master calls "destroy" at the end of the parallel region.
+ ------------------------------------------------------------------------------------------------
+*/
+
+__itt_caller
+__kmp_itt_stack_caller_create()
+{
+#if USE_ITT_NOTIFY
+ if ( !__itt_stack_caller_create_ptr )
+ return NULL;
+ KMP_ITT_DEBUG_LOCK();
+ __itt_caller id = __itt_stack_caller_create();
+ KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id );
+ return id;
+#endif
+ return NULL;
+}
+
+void
+__kmp_itt_stack_caller_destroy( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+ if ( __itt_stack_caller_destroy_ptr ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_caller_destroy( id );
+ KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id );
+ }
+#endif
+}
+
+void
+__kmp_itt_stack_callee_enter( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+ if ( __itt_stack_callee_enter_ptr ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_callee_enter( id );
+ KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id );
+ }
+#endif
+}
+
+void
+__kmp_itt_stack_callee_leave( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+ if ( __itt_stack_callee_leave_ptr ) {
+ KMP_ITT_DEBUG_LOCK();
+ __itt_stack_callee_leave( id );
+ KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id );
+ }
+#endif
+}
+
+#endif /* USE_ITT_BUILD */