diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/libs/cxxsupp/openmp/kmp_lock.h | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/kmp_lock.h')
-rw-r--r-- | contrib/libs/cxxsupp/openmp/kmp_lock.h | 2546 |
1 files changed, 1273 insertions, 1273 deletions
diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.h b/contrib/libs/cxxsupp/openmp/kmp_lock.h index d79db4ae96..8cd01d3981 100644 --- a/contrib/libs/cxxsupp/openmp/kmp_lock.h +++ b/contrib/libs/cxxsupp/openmp/kmp_lock.h @@ -1,1273 +1,1273 @@ -/* - * kmp_lock.h -- lock header file - */ - - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef KMP_LOCK_H -#define KMP_LOCK_H - -#include <limits.h> // CHAR_BIT -#include <stddef.h> // offsetof - -#include "kmp_os.h" -#include "kmp_debug.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// ---------------------------------------------------------------------------- -// Have to copy these definitions from kmp.h because kmp.h cannot be included -// due to circular dependencies. Will undef these at end of file. - -#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) -#define KMP_GTID_DNE (-2) - -// Forward declaration of ident and ident_t - -struct ident; -typedef struct ident ident_t; - -// End of copied code. -// ---------------------------------------------------------------------------- - -// -// We need to know the size of the area we can assume that the compiler(s) -// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel -// compiler always allocates a pointer-sized area, as does visual studio. -// -// gcc however, only allocates 4 bytes for regular locks, even on 64-bit -// intel archs. It allocates at least 8 bytes for nested lock (more on -// recent versions), but we are bounded by the pointer-sized chunks that -// the Intel compiler allocates. -// - -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) -# define OMP_LOCK_T_SIZE sizeof(int) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#else -# define OMP_LOCK_T_SIZE sizeof(void *) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) -#endif - -// -// The Intel compiler allocates a 32-byte chunk for a critical section. -// Both gcc and visual studio only allocate enough space for a pointer. -// Sometimes we know that the space was allocated by the Intel compiler. -// -#define OMP_CRITICAL_SIZE sizeof(void *) -#define INTEL_CRITICAL_SIZE 32 - -// -// lock flags -// -typedef kmp_uint32 kmp_lock_flags_t; - -#define kmp_lf_critical_section 1 - -// -// When a lock table is used, the indices are of kmp_lock_index_t -// -typedef kmp_uint32 kmp_lock_index_t; - -// -// When memory allocated for locks are on the lock pool (free list), -// it is treated as structs of this type. -// -struct kmp_lock_pool { - union kmp_user_lock *next; - kmp_lock_index_t index; -}; - -typedef struct kmp_lock_pool kmp_lock_pool_t; - - -extern void __kmp_validate_locks( void ); - - -// ---------------------------------------------------------------------------- -// -// There are 5 lock implementations: -// -// 1. Test and set locks. -// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) -// 3. Ticket (Lamport bakery) locks. -// 4. Queuing locks (with separate spin fields). -// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks -// -// and 3 lock purposes: -// -// 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. -// These do not require non-negative global thread ID's. -// 2. Internal RTL locks -- Used everywhere else in the RTL -// 3. User locks (includes critical sections) -// -// ---------------------------------------------------------------------------- - - -// ============================================================================ -// Lock implementations. -// ============================================================================ - - -// ---------------------------------------------------------------------------- -// Test and set locks. -// -// Non-nested test and set locks differ from the other lock kinds (except -// futex) in that we use the memory allocated by the compiler for the lock, -// rather than a pointer to it. -// -// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 -// bytes, so we have to use a lock table for nested locks, and avoid accessing -// the depth_locked field for non-nested locks. -// -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_tas_lock { - volatile kmp_int32 poll; // 0 => unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; - -union kmp_tas_lock { - kmp_base_tas_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding -}; - -typedef union kmp_tas_lock kmp_tas_lock_t; - -// -// Static initializer for test and set lock variables. Usage: -// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); -// -#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); - -extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); - -#define KMP_LOCK_RELEASED 1 -#define KMP_LOCK_STILL_HELD 0 -#define KMP_LOCK_ACQUIRED_FIRST 1 -#define KMP_LOCK_ACQUIRED_NEXT 0 - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -// ---------------------------------------------------------------------------- -// futex locks. futex locks are only available on Linux* OS. -// -// Like non-nested test and set lock, non-nested futex locks use the memory -// allocated by the compiler for the lock, rather than a pointer to it. -// -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. With non-nested futex locks, the lock owner is not -// even available. -// ---------------------------------------------------------------------------- - -struct kmp_base_futex_lock { - volatile kmp_int32 poll; // 0 => unlocked - // 2*(gtid+1) of owning thread, 0 if unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only -}; - -typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; - -union kmp_futex_lock { - kmp_base_futex_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding -}; - -typedef union kmp_futex_lock kmp_futex_lock_t; - -// -// Static initializer for futex lock variables. Usage: -// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); -// -#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); - -extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); - -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - - -// ---------------------------------------------------------------------------- -// Ticket locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; - -union KMP_ALIGN_CACHE kmp_ticket_lock { - kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_ticket_lock kmp_ticket_lock_t; - -// -// Static initializer for simple ticket lock variables. Usage: -// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); -// Note the macro argument. It is important to make var properly initialized. -// -#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } - -extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); - -extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); - - -// ---------------------------------------------------------------------------- -// Queuing locks. -// ---------------------------------------------------------------------------- - -#if KMP_USE_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info; - -typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_statistics { - /* So we can get stats from locks that haven't been destroyed. */ - kmp_adaptive_lock_info_t * next; - kmp_adaptive_lock_info_t * prev; - - /* Other statistics */ - kmp_uint32 successfulSpeculations; - kmp_uint32 hardFailedSpeculations; - kmp_uint32 softFailedSpeculations; - kmp_uint32 nonSpeculativeAcquires; - kmp_uint32 nonSpeculativeAcquireAttempts; - kmp_uint32 lemmingYields; -}; - -typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; - -extern void __kmp_print_speculative_stats(); -extern void __kmp_init_speculative_stats(); - -#endif // KMP_DEBUG_ADAPTIVE_LOCKS - -struct kmp_adaptive_lock_info -{ - /* Values used for adaptivity. - * Although these are accessed from multiple threads we don't access them atomically, - * because if we miss updates it probably doesn't matter much. (It just affects our - * decision about whether to try speculation on the lock). - */ - kmp_uint32 volatile badness; - kmp_uint32 volatile acquire_attempts; - /* Parameters of the lock. */ - kmp_uint32 max_badness; - kmp_uint32 max_soft_retries; - -#if KMP_DEBUG_ADAPTIVE_LOCKS - kmp_adaptive_lock_statistics_t volatile stats; -#endif -}; - -#endif // KMP_USE_ADAPTIVE_LOCKS - - -struct kmp_base_queuing_lock { - - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. - - ident_t const * location; // Source code location of omp_init_lock(). - - KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! - - volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty - // Must be no padding here since head/tail used in 8-byte CAS - volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty - // Decl order assumes little endian - // bakery-style lock - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; - -KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); - -union KMP_ALIGN_CACHE kmp_queuing_lock { - kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_queuing_lock kmp_queuing_lock_t; - -extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); - -extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); - -#if KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// Adaptive locks. -// ---------------------------------------------------------------------------- -struct kmp_base_adaptive_lock { - kmp_base_queuing_lock qlk; - KMP_ALIGN(CACHE_LINE) - kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock -}; - -typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; - -union KMP_ALIGN_CACHE kmp_adaptive_lock { - kmp_base_adaptive_lock_t lk; - kmp_lock_pool_t pool; - double lk_align; - char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; -}; -typedef union kmp_adaptive_lock kmp_adaptive_lock_t; - -# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) - -#endif // KMP_USE_ADAPTIVE_LOCKS - -// ---------------------------------------------------------------------------- -// DRDPA ticket locks. -// ---------------------------------------------------------------------------- - -struct kmp_base_drdpa_lock { - // - // All of the fields on the first cache line are only written when - // initializing or reconfiguring the lock. These are relatively rare - // operations, so data from the first cache line will usually stay - // resident in the cache of each thread trying to acquire the lock. - // - // initialized must be the first entry in the lock data structure! - // - KMP_ALIGN_CACHE - - volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile struct kmp_lock_poll { - kmp_uint64 poll; - } * volatile polls; - volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op - kmp_uint64 cleanup_ticket; // thread with cleanup ticket - volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls - kmp_uint32 num_polls; // must be power of 2 - - // - // next_ticket it needs to exist in a separate cache line, as it is - // invalidated every time a thread takes a new ticket. - // - KMP_ALIGN_CACHE - - volatile kmp_uint64 next_ticket; - - // - // now_serving is used to store our ticket value while we hold the lock. - // It has a slightly different meaning in the DRDPA ticket locks (where - // it is written by the acquiring thread) than it does in the simple - // ticket locks (where it is written by the releasing thread). - // - // Since now_serving is only read an written in the critical section, - // it is non-volatile, but it needs to exist on a separate cache line, - // as it is invalidated at every lock acquire. - // - // Likewise, the vars used for nested locks (owner_id and depth_locked) - // are only written by the thread owning the lock, so they are put in - // this cache line. owner_id is read by other threads, so it must be - // declared volatile. - // - KMP_ALIGN_CACHE - - kmp_uint64 now_serving; // doesn't have to be volatile - volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock -}; - -typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; - -union KMP_ALIGN_CACHE kmp_drdpa_lock { - kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; -}; - -typedef union kmp_drdpa_lock kmp_drdpa_lock_t; - -extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); - -extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); - - -// ============================================================================ -// Lock purposes. -// ============================================================================ - - -// ---------------------------------------------------------------------------- -// Bootstrap locks. -// ---------------------------------------------------------------------------- - -// Bootstrap locks -- very few locks used at library initialization time. -// Bootstrap locks are currently implemented as ticket locks. -// They could also be implemented as test and set lock, but cannot be -// implemented with other lock kinds as they require gtids which are not -// available at initialization time. - -typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; - -#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) - -static inline int -__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline int -__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline void -__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); -} - -static inline void -__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -static inline void -__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); -} - - -// ---------------------------------------------------------------------------- -// Internal RTL locks. -// ---------------------------------------------------------------------------- - -// -// Internal RTL locks are also implemented as ticket locks, for now. -// -// FIXME - We should go through and figure out which lock kind works best for -// each internal lock, and use the type declaration and function calls for -// that explicit lock kind (and get rid of this section). -// - -typedef kmp_ticket_lock_t kmp_lock_t; - -static inline int -__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_ticket_lock( lck, gtid ); -} - -static inline int -__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_test_ticket_lock( lck, gtid ); -} - -static inline void -__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - __kmp_release_ticket_lock( lck, gtid ); -} - -static inline void -__kmp_init_lock( kmp_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -static inline void -__kmp_destroy_lock( kmp_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); -} - - -// ---------------------------------------------------------------------------- -// User locks. -// ---------------------------------------------------------------------------- - -// -// Do not allocate objects of type union kmp_user_lock!!! -// This will waste space unless __kmp_user_lock_kind == lk_drdpa. -// Instead, check the value of __kmp_user_lock_kind and allocate objects of -// the type of the appropriate union member, and cast their addresses to -// kmp_user_lock_p. -// - -enum kmp_lock_kind { - lk_default = 0, - lk_tas, -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - lk_futex, -#endif -#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - lk_hle, - lk_rtm, -#endif - lk_ticket, - lk_queuing, - lk_drdpa, -#if KMP_USE_ADAPTIVE_LOCKS - lk_adaptive -#endif // KMP_USE_ADAPTIVE_LOCKS -}; - -typedef enum kmp_lock_kind kmp_lock_kind_t; - -extern kmp_lock_kind_t __kmp_user_lock_kind; - -union kmp_user_lock { - kmp_tas_lock_t tas; -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - kmp_futex_lock_t futex; -#endif - kmp_ticket_lock_t ticket; - kmp_queuing_lock_t queuing; - kmp_drdpa_lock_t drdpa; -#if KMP_USE_ADAPTIVE_LOCKS - kmp_adaptive_lock_t adaptive; -#endif // KMP_USE_ADAPTIVE_LOCKS - kmp_lock_pool_t pool; -}; - -typedef union kmp_user_lock *kmp_user_lock_p; - -#if ! KMP_USE_DYNAMIC_LOCK - -extern size_t __kmp_base_user_lock_size; -extern size_t __kmp_user_lock_size; - -extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); - -static inline kmp_int32 -__kmp_get_user_lock_owner( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); - return ( *__kmp_get_user_lock_owner_ )( lck ); -} - -extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#define __kmp_acquire_user_lock_with_checks(lck,gtid) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked != -1 ) { \ - KMP_FATAL( LockNestableUsedAsSimple, func ); \ - } \ - if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ - KMP_FATAL( LockIsAlreadyOwned, func ); \ - } \ - } \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ - ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ - } - -#else -static inline int -__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); - return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ -extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked != -1 ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - } - return ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - } else { - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); - } -} -#else -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -static inline void -__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); - ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); -} - -extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); - ( *__kmp_init_user_lock_with_checks_ )( lck ); -} - -// -// We need a non-checking version of destroy lock for when the RTL is -// doing the cleanup as it can't always tell if the lock is nested or not. -// -extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_user_lock( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); - ( *__kmp_destroy_user_lock_ )( lck ); -} - -extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_user_lock_with_checks_ )( lck ); -} - -extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) - -#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_nest_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked == -1 ) { \ - KMP_FATAL( LockSimpleUsedAsNestable, func ); \ - } \ - } \ - if ( lck->tas.lk.poll - 1 == gtid ) { \ - lck->tas.lk.depth_locked += 1; \ - *depth = KMP_LOCK_ACQUIRED_NEXT; \ - } else { \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - lck->tas.lk.depth_locked = 1; \ - *depth = KMP_LOCK_ACQUIRED_FIRST; \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ - } - -#else -static inline void -__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - int retval; - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_nest_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked == -1 ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - } - KMP_DEBUG_ASSERT( gtid >= 0 ); - if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ - return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ - } - retval = ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - if ( retval ) { - KMP_MB(); - lck->tas.lk.depth_locked = 1; - } - return retval; - } else { - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); - } -} -#else -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); -} -#endif - -extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -static inline int -__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); -} - -extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); -} - -extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); - -static inline void -__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); -} - -// -// user lock functions which do not necessarily exist for all lock kinds. -// -// The "set" functions usually have wrapper routines that check for a NULL set -// function pointer and call it if non-NULL. -// -// In some cases, it makes sense to have a "get" wrapper function check for a -// NULL get function pointer and return NULL / invalid value / error code if -// the function pointer is NULL. -// -// In other cases, the calling code really should differentiate between an -// unimplemented function and one that is implemented but returning NULL / -// invalied value. If this is the case, no get function wrapper exists. -// - -extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); - -// no set function; fields set durining local allocation - -extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); - -static inline const ident_t * -__kmp_get_user_lock_location( kmp_user_lock_p lck ) -{ - if ( __kmp_get_user_lock_location_ != NULL ) { - return ( *__kmp_get_user_lock_location_ )( lck ); - } - else { - return NULL; - } -} - -extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); - -static inline void -__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) -{ - if ( __kmp_set_user_lock_location_ != NULL ) { - ( *__kmp_set_user_lock_location_ )( lck, loc ); - } -} - -extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); - -extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); - -static inline void -__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) -{ - if ( __kmp_set_user_lock_flags_ != NULL ) { - ( *__kmp_set_user_lock_flags_ )( lck, flags ); - } -} - -// -// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. -// -extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); - -// -// Macros for binding user lock functions. -// -#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ - __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_acquire##nest##kind##_##suffix; \ - __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_release##nest##kind##_##suffix; \ - __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_test##nest##kind##_##suffix; \ - __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_init##nest##kind##_##suffix; \ - __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_destroy##nest##kind##_##suffix; \ -} - -#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) -#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) -#define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) -#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) - -// ---------------------------------------------------------------------------- -// User lock table & lock allocation -// ---------------------------------------------------------------------------- - -/* - On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which - is not enough to store a pointer, so we have to use lock indexes instead of pointers and - maintain lock table to map indexes to pointers. - - - Note: The first element of the table is not a pointer to lock! It is a pointer to previously - allocated table (or NULL if it is the first table). - - Usage: - - if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE - Lock table is fully utilized. User locks are indexes, so table is - used on user lock operation. - Note: it may be the case (lin_32) that we don't need to use a lock - table for regular locks, but do need the table for nested locks. - } - else { - Lock table initialized but not actually used. - } -*/ - -struct kmp_lock_table { - kmp_lock_index_t used; // Number of used elements - kmp_lock_index_t allocated; // Number of allocated elements - kmp_user_lock_p * table; // Lock table. -}; - -typedef struct kmp_lock_table kmp_lock_table_t; - -extern kmp_lock_table_t __kmp_user_lock_table; -extern kmp_user_lock_p __kmp_lock_pool; - -struct kmp_block_of_locks { - struct kmp_block_of_locks * next_block; - void * locks; -}; - -typedef struct kmp_block_of_locks kmp_block_of_locks_t; - -extern kmp_block_of_locks_t *__kmp_lock_blocks; -extern int __kmp_num_locks_in_block; - -extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); -extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); -extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); -extern void __kmp_cleanup_user_locks(); - -#define KMP_CHECK_USER_LOCK_INIT() \ - { \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - TCW_4( __kmp_init_user_locks, TRUE ); \ - } \ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ - } \ - } - -#endif // KMP_USE_DYNAMIC_LOCK - -#undef KMP_PAD -#undef KMP_GTID_DNE - -#if KMP_USE_DYNAMIC_LOCK - -// -// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current -// compatibility. Essential functionality of this new code is dynamic dispatch, but it also -// implements (or enables implementation of) hinted user lock and critical section which will be -// part of OMP 4.1 soon. -// -// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock -// function call on the created lock object requires type extraction and call through jump table -// using the extracted type. This type information is stored in two different ways depending on -// the size of the lock object, and we differentiate lock types by this size requirement - direct -// and indirect locks. -// -// Direct locks: -// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and -// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage -// for the lock type, and appropriate bit operation is required to access the data meaningful to -// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB -// of the lock object. The newly introduced "hle" lock is also a direct lock. -// -// Indirect locks: -// An indirect lock object requires more space than the compiler-generated space, and it should be -// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., -// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated -// indirect lock (void * fits in the object) or an index to the indirect lock table entry that -// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly -// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. -// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to -// differentiate the lock from a direct lock, and the remaining part is the actual index to the -// indirect lock table. -// - -#include <stdint.h> // for uintptr_t - -// Shortcuts -#define KMP_USE_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) -#define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 -#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 - -// List of lock definitions; all nested locks are indirect locks. -// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. -// All nested locks are indirect lock types. -#if KMP_USE_TSX -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# endif // KMP_USE_FUTEX -# define KMP_LAST_D_LOCK lockseq_hle -#else -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_futex -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_tas -# endif // KMP_USE_FUTEX -#endif // KMP_USE_TSX - -// Information used in dynamic dispatch -#define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks -#define KMP_FIRST_D_LOCK lockseq_tas -#define KMP_FIRST_I_LOCK lockseq_ticket -#define KMP_LAST_I_LOCK lockseq_nested_drdpa -#define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types - -// Base type for dynamic locks. -typedef kmp_uint32 kmp_dyna_lock_t; - -// Lock sequence that enumerates all lock kinds. -// Always make this enumeration consistent with kmp_lockseq_t in the include directory. -typedef enum { - lockseq_indirect = 0, -#define expand_seq(l,a) lockseq_##l, - KMP_FOREACH_D_LOCK(expand_seq, 0) - KMP_FOREACH_I_LOCK(expand_seq, 0) -#undef expand_seq -} kmp_dyna_lockseq_t; - -// Enumerates indirect lock tags. -typedef enum { -#define expand_tag(l,a) locktag_##l, - KMP_FOREACH_I_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_indirect_locktag_t; - -// Utility macros that extract information from lock sequences. -#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) -#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) -#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) -#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) - -// Enumerates direct lock tags starting from indirect tag. -typedef enum { -#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), - KMP_FOREACH_D_LOCK(expand_tag, 0) -#undef expand_tag -} kmp_direct_locktag_t; - -// Indirect lock type -typedef struct { - kmp_user_lock_p lock; - kmp_indirect_locktag_t type; -} kmp_indirect_lock_t; - -// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. -extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); -extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); -extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); - -// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. -extern void (*__kmp_indirect_init[])(kmp_user_lock_p); -extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); - -// Extracts direct lock tag from a user lock pointer -#define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1)) - -// Extracts indirect lock index from a user lock pointer -#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) - -// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). -#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] - -// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). -#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] - -// Initializes a direct lock with the given lock pointer and lock sequence. -#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) - -// Initializes an indirect lock with the given lock pointer and lock sequence. -#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) - -// Returns "free" lock value for the given lock type. -#define KMP_LOCK_FREE(type) (locktag_##type) - -// Returns "busy" lock value for the given lock teyp. -#define KMP_LOCK_BUSY(v, type) ((v)<<KMP_LOCK_SHIFT | locktag_##type) - -// Returns lock value after removing (shifting) lock tag. -#define KMP_LOCK_STRIP(v) ((v)>>KMP_LOCK_SHIFT) - -// Initializes global states and data structures for managing dynamic user locks. -extern void __kmp_init_dynamic_user_locks(); - -// Allocates and returns an indirect lock with the given indirect lock tag. -extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); - -// Cleans up global states and data structures for managing dynamic user locks. -extern void __kmp_cleanup_indirect_user_locks(); - -// Default user lock sequence when not using hinted locks. -extern kmp_dyna_lockseq_t __kmp_user_lock_seq; - -// Jump table for "set lock location", available only for indirect locks. -extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); -#define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ - if (__kmp_indirect_set_location[(lck)->type] != NULL) \ - __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ -} - -// Jump table for "set lock flags", available only for indirect locks. -extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); -#define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ - if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ - __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ -} - -// Jump table for "get lock location", available only for indirect locks. -extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ - ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ - : NULL ) - -// Jump table for "get lock flags", available only for indirect locks. -extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ - ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ - : NULL ) - -#define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together - -// Lock table for indirect locks. -typedef struct kmp_indirect_lock_table { - kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table - kmp_lock_index_t next; // index to the next lock to be allocated -} kmp_indirect_lock_table_t; - -extern kmp_indirect_lock_table_t __kmp_i_lock_table; - -// Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) - -// Number of locks in a lock block, which is fixed to "1" now. -// TODO: No lock block implementation now. If we do support, we need to manage lock block data -// structure for each indirect lock type. -extern int __kmp_num_locks_in_block; - -// Fast lock table lookup without consistency checking -#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ - ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l)) ) - -// Used once in kmp_error.c -extern kmp_int32 -__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); - -#else // KMP_USE_DYNAMIC_LOCK - -# define KMP_LOCK_BUSY(v, type) (v) -# define KMP_LOCK_FREE(type) 0 -# define KMP_LOCK_STRIP(v) (v) - -#endif // KMP_USE_DYNAMIC_LOCK - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif /* KMP_LOCK_H */ - +/* + * kmp_lock.h -- lock header file + */ + + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef KMP_LOCK_H +#define KMP_LOCK_H + +#include <limits.h> // CHAR_BIT +#include <stddef.h> // offsetof + +#include "kmp_os.h" +#include "kmp_debug.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// ---------------------------------------------------------------------------- +// Have to copy these definitions from kmp.h because kmp.h cannot be included +// due to circular dependencies. Will undef these at end of file. + +#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) +#define KMP_GTID_DNE (-2) + +// Forward declaration of ident and ident_t + +struct ident; +typedef struct ident ident_t; + +// End of copied code. +// ---------------------------------------------------------------------------- + +// +// We need to know the size of the area we can assume that the compiler(s) +// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel +// compiler always allocates a pointer-sized area, as does visual studio. +// +// gcc however, only allocates 4 bytes for regular locks, even on 64-bit +// intel archs. It allocates at least 8 bytes for nested lock (more on +// recent versions), but we are bounded by the pointer-sized chunks that +// the Intel compiler allocates. +// + +#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +# define OMP_LOCK_T_SIZE sizeof(int) +# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#else +# define OMP_LOCK_T_SIZE sizeof(void *) +# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#endif + +// +// The Intel compiler allocates a 32-byte chunk for a critical section. +// Both gcc and visual studio only allocate enough space for a pointer. +// Sometimes we know that the space was allocated by the Intel compiler. +// +#define OMP_CRITICAL_SIZE sizeof(void *) +#define INTEL_CRITICAL_SIZE 32 + +// +// lock flags +// +typedef kmp_uint32 kmp_lock_flags_t; + +#define kmp_lf_critical_section 1 + +// +// When a lock table is used, the indices are of kmp_lock_index_t +// +typedef kmp_uint32 kmp_lock_index_t; + +// +// When memory allocated for locks are on the lock pool (free list), +// it is treated as structs of this type. +// +struct kmp_lock_pool { + union kmp_user_lock *next; + kmp_lock_index_t index; +}; + +typedef struct kmp_lock_pool kmp_lock_pool_t; + + +extern void __kmp_validate_locks( void ); + + +// ---------------------------------------------------------------------------- +// +// There are 5 lock implementations: +// +// 1. Test and set locks. +// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) +// 3. Ticket (Lamport bakery) locks. +// 4. Queuing locks (with separate spin fields). +// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks +// +// and 3 lock purposes: +// +// 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. +// These do not require non-negative global thread ID's. +// 2. Internal RTL locks -- Used everywhere else in the RTL +// 3. User locks (includes critical sections) +// +// ---------------------------------------------------------------------------- + + +// ============================================================================ +// Lock implementations. +// ============================================================================ + + +// ---------------------------------------------------------------------------- +// Test and set locks. +// +// Non-nested test and set locks differ from the other lock kinds (except +// futex) in that we use the memory allocated by the compiler for the lock, +// rather than a pointer to it. +// +// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 +// bytes, so we have to use a lock table for nested locks, and avoid accessing +// the depth_locked field for non-nested locks. +// +// Information normally available to the tools, such as lock location, +// lock usage (normal lock vs. critical section), etc. is not available with +// test and set locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_tas_lock { + volatile kmp_int32 poll; // 0 => unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; + +union kmp_tas_lock { + kmp_base_tas_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding +}; + +typedef union kmp_tas_lock kmp_tas_lock_t; + +// +// Static initializer for test and set lock variables. Usage: +// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); +// +#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } + +extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); +extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); + +extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); +extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); + +#define KMP_LOCK_RELEASED 1 +#define KMP_LOCK_STILL_HELD 0 +#define KMP_LOCK_ACQUIRED_FIRST 1 +#define KMP_LOCK_ACQUIRED_NEXT 0 + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +// ---------------------------------------------------------------------------- +// futex locks. futex locks are only available on Linux* OS. +// +// Like non-nested test and set lock, non-nested futex locks use the memory +// allocated by the compiler for the lock, rather than a pointer to it. +// +// Information normally available to the tools, such as lock location, +// lock usage (normal lock vs. critical section), etc. is not available with +// test and set locks. With non-nested futex locks, the lock owner is not +// even available. +// ---------------------------------------------------------------------------- + +struct kmp_base_futex_lock { + volatile kmp_int32 poll; // 0 => unlocked + // 2*(gtid+1) of owning thread, 0 if unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; + +union kmp_futex_lock { + kmp_base_futex_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding +}; + +typedef union kmp_futex_lock kmp_futex_lock_t; + +// +// Static initializer for futex lock variables. Usage: +// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); +// +#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } + +extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); +extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); + +extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); +extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); + +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + + +// ---------------------------------------------------------------------------- +// Ticket locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_ticket_lock { + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state + ident_t const * location; // Source code location of omp_init_lock(). + volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; + +union KMP_ALIGN_CACHE kmp_ticket_lock { + kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_ticket_lock kmp_ticket_lock_t; + +// +// Static initializer for simple ticket lock variables. Usage: +// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); +// Note the macro argument. It is important to make var properly initialized. +// +#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } + +extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); +extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); + +extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); +extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); + + +// ---------------------------------------------------------------------------- +// Queuing locks. +// ---------------------------------------------------------------------------- + +#if KMP_USE_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info; + +typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_statistics { + /* So we can get stats from locks that haven't been destroyed. */ + kmp_adaptive_lock_info_t * next; + kmp_adaptive_lock_info_t * prev; + + /* Other statistics */ + kmp_uint32 successfulSpeculations; + kmp_uint32 hardFailedSpeculations; + kmp_uint32 softFailedSpeculations; + kmp_uint32 nonSpeculativeAcquires; + kmp_uint32 nonSpeculativeAcquireAttempts; + kmp_uint32 lemmingYields; +}; + +typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; + +extern void __kmp_print_speculative_stats(); +extern void __kmp_init_speculative_stats(); + +#endif // KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info +{ + /* Values used for adaptivity. + * Although these are accessed from multiple threads we don't access them atomically, + * because if we miss updates it probably doesn't matter much. (It just affects our + * decision about whether to try speculation on the lock). + */ + kmp_uint32 volatile badness; + kmp_uint32 volatile acquire_attempts; + /* Parameters of the lock. */ + kmp_uint32 max_badness; + kmp_uint32 max_soft_retries; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + kmp_adaptive_lock_statistics_t volatile stats; +#endif +}; + +#endif // KMP_USE_ADAPTIVE_LOCKS + + +struct kmp_base_queuing_lock { + + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. + + ident_t const * location; // Source code location of omp_init_lock(). + + KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! + + volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty + // Must be no padding here since head/tail used in 8-byte CAS + volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty + // Decl order assumes little endian + // bakery-style lock + volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; + +KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); + +union KMP_ALIGN_CACHE kmp_queuing_lock { + kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_queuing_lock kmp_queuing_lock_t; + +extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); +extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); + +extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); +extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); + +#if KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// Adaptive locks. +// ---------------------------------------------------------------------------- +struct kmp_base_adaptive_lock { + kmp_base_queuing_lock qlk; + KMP_ALIGN(CACHE_LINE) + kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock +}; + +typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; + +union KMP_ALIGN_CACHE kmp_adaptive_lock { + kmp_base_adaptive_lock_t lk; + kmp_lock_pool_t pool; + double lk_align; + char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; +}; +typedef union kmp_adaptive_lock kmp_adaptive_lock_t; + +# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) + +#endif // KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// DRDPA ticket locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_drdpa_lock { + // + // All of the fields on the first cache line are only written when + // initializing or reconfiguring the lock. These are relatively rare + // operations, so data from the first cache line will usually stay + // resident in the cache of each thread trying to acquire the lock. + // + // initialized must be the first entry in the lock data structure! + // + KMP_ALIGN_CACHE + + volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state + ident_t const * location; // Source code location of omp_init_lock(). + volatile struct kmp_lock_poll { + kmp_uint64 poll; + } * volatile polls; + volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op + kmp_uint64 cleanup_ticket; // thread with cleanup ticket + volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls + kmp_uint32 num_polls; // must be power of 2 + + // + // next_ticket it needs to exist in a separate cache line, as it is + // invalidated every time a thread takes a new ticket. + // + KMP_ALIGN_CACHE + + volatile kmp_uint64 next_ticket; + + // + // now_serving is used to store our ticket value while we hold the lock. + // It has a slightly different meaning in the DRDPA ticket locks (where + // it is written by the acquiring thread) than it does in the simple + // ticket locks (where it is written by the releasing thread). + // + // Since now_serving is only read an written in the critical section, + // it is non-volatile, but it needs to exist on a separate cache line, + // as it is invalidated at every lock acquire. + // + // Likewise, the vars used for nested locks (owner_id and depth_locked) + // are only written by the thread owning the lock, so they are put in + // this cache line. owner_id is read by other threads, so it must be + // declared volatile. + // + KMP_ALIGN_CACHE + + kmp_uint64 now_serving; // doesn't have to be volatile + volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; + +union KMP_ALIGN_CACHE kmp_drdpa_lock { + kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; +}; + +typedef union kmp_drdpa_lock kmp_drdpa_lock_t; + +extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); +extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); + +extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); +extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); +extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); + + +// ============================================================================ +// Lock purposes. +// ============================================================================ + + +// ---------------------------------------------------------------------------- +// Bootstrap locks. +// ---------------------------------------------------------------------------- + +// Bootstrap locks -- very few locks used at library initialization time. +// Bootstrap locks are currently implemented as ticket locks. +// They could also be implemented as test and set lock, but cannot be +// implemented with other lock kinds as they require gtids which are not +// available at initialization time. + +typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; + +#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) + +static inline int +__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline int +__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline void +__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); +} + +static inline void +__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_init_ticket_lock( lck ); +} + +static inline void +__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) +{ + __kmp_destroy_ticket_lock( lck ); +} + + +// ---------------------------------------------------------------------------- +// Internal RTL locks. +// ---------------------------------------------------------------------------- + +// +// Internal RTL locks are also implemented as ticket locks, for now. +// +// FIXME - We should go through and figure out which lock kind works best for +// each internal lock, and use the type declaration and function calls for +// that explicit lock kind (and get rid of this section). +// + +typedef kmp_ticket_lock_t kmp_lock_t; + +static inline int +__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_acquire_ticket_lock( lck, gtid ); +} + +static inline int +__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + return __kmp_test_ticket_lock( lck, gtid ); +} + +static inline void +__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) +{ + __kmp_release_ticket_lock( lck, gtid ); +} + +static inline void +__kmp_init_lock( kmp_lock_t *lck ) +{ + __kmp_init_ticket_lock( lck ); +} + +static inline void +__kmp_destroy_lock( kmp_lock_t *lck ) +{ + __kmp_destroy_ticket_lock( lck ); +} + + +// ---------------------------------------------------------------------------- +// User locks. +// ---------------------------------------------------------------------------- + +// +// Do not allocate objects of type union kmp_user_lock!!! +// This will waste space unless __kmp_user_lock_kind == lk_drdpa. +// Instead, check the value of __kmp_user_lock_kind and allocate objects of +// the type of the appropriate union member, and cast their addresses to +// kmp_user_lock_p. +// + +enum kmp_lock_kind { + lk_default = 0, + lk_tas, +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + lk_futex, +#endif +#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX + lk_hle, + lk_rtm, +#endif + lk_ticket, + lk_queuing, + lk_drdpa, +#if KMP_USE_ADAPTIVE_LOCKS + lk_adaptive +#endif // KMP_USE_ADAPTIVE_LOCKS +}; + +typedef enum kmp_lock_kind kmp_lock_kind_t; + +extern kmp_lock_kind_t __kmp_user_lock_kind; + +union kmp_user_lock { + kmp_tas_lock_t tas; +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + kmp_futex_lock_t futex; +#endif + kmp_ticket_lock_t ticket; + kmp_queuing_lock_t queuing; + kmp_drdpa_lock_t drdpa; +#if KMP_USE_ADAPTIVE_LOCKS + kmp_adaptive_lock_t adaptive; +#endif // KMP_USE_ADAPTIVE_LOCKS + kmp_lock_pool_t pool; +}; + +typedef union kmp_user_lock *kmp_user_lock_p; + +#if ! KMP_USE_DYNAMIC_LOCK + +extern size_t __kmp_base_user_lock_size; +extern size_t __kmp_user_lock_size; + +extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); + +static inline kmp_int32 +__kmp_get_user_lock_owner( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); + return ( *__kmp_get_user_lock_owner_ )( lck ); +} + +extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#define __kmp_acquire_user_lock_with_checks(lck,gtid) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if ( __kmp_env_consistency_check ) { \ + char const * const func = "omp_set_lock"; \ + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ + && lck->tas.lk.depth_locked != -1 ) { \ + KMP_FATAL( LockNestableUsedAsSimple, func ); \ + } \ + if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ + KMP_FATAL( LockIsAlreadyOwned, func ); \ + } \ + } \ + if ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE( lck ); \ + KMP_INIT_YIELD( spins ); \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + while ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED( lck ); \ + } else { \ + KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ + ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ + } + +#else +static inline int +__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); + return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ +extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ +static inline int +__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + if ( __kmp_user_lock_kind == lk_tas ) { + if ( __kmp_env_consistency_check ) { + char const * const func = "omp_test_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) + && lck->tas.lk.depth_locked != -1 ) { + KMP_FATAL( LockNestableUsedAsSimple, func ); + } + } + return ( ( lck->tas.lk.poll == 0 ) && + KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); + } else { + KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); + } +} +#else +static inline int +__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +static inline void +__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); + ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); +} + +extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); + ( *__kmp_init_user_lock_with_checks_ )( lck ); +} + +// +// We need a non-checking version of destroy lock for when the RTL is +// doing the cleanup as it can't always tell if the lock is nested or not. +// +extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_user_lock( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); + ( *__kmp_destroy_user_lock_ )( lck ); +} + +extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); + ( *__kmp_destroy_user_lock_with_checks_ )( lck ); +} + +extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) + +#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if ( __kmp_env_consistency_check ) { \ + char const * const func = "omp_set_nest_lock"; \ + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ + && lck->tas.lk.depth_locked == -1 ) { \ + KMP_FATAL( LockSimpleUsedAsNestable, func ); \ + } \ + } \ + if ( lck->tas.lk.poll - 1 == gtid ) { \ + lck->tas.lk.depth_locked += 1; \ + *depth = KMP_LOCK_ACQUIRED_NEXT; \ + } else { \ + if ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE( lck ); \ + KMP_INIT_YIELD( spins ); \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + while ( ( lck->tas.lk.poll != 0 ) || \ + ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ + if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ + KMP_YIELD( TRUE ); \ + } else { \ + KMP_YIELD_SPIN( spins ); \ + } \ + } \ + } \ + lck->tas.lk.depth_locked = 1; \ + *depth = KMP_LOCK_ACQUIRED_FIRST; \ + } \ + KMP_FSYNC_ACQUIRED( lck ); \ + } else { \ + KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ + *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ + } + +#else +static inline void +__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) +{ + KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); + *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +static inline int +__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + if ( __kmp_user_lock_kind == lk_tas ) { + int retval; + if ( __kmp_env_consistency_check ) { + char const * const func = "omp_test_nest_lock"; + if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) + && lck->tas.lk.depth_locked == -1 ) { + KMP_FATAL( LockSimpleUsedAsNestable, func ); + } + } + KMP_DEBUG_ASSERT( gtid >= 0 ); + if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ + return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ + } + retval = ( ( lck->tas.lk.poll == 0 ) && + KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); + if ( retval ) { + KMP_MB(); + lck->tas.lk.depth_locked = 1; + } + return retval; + } else { + KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); + } +} +#else +static inline int +__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); +} +#endif + +extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); + +static inline int +__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) +{ + KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); + return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); +} + +extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); + ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); +} + +extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); + +static inline void +__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) +{ + KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); + ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); +} + +// +// user lock functions which do not necessarily exist for all lock kinds. +// +// The "set" functions usually have wrapper routines that check for a NULL set +// function pointer and call it if non-NULL. +// +// In some cases, it makes sense to have a "get" wrapper function check for a +// NULL get function pointer and return NULL / invalid value / error code if +// the function pointer is NULL. +// +// In other cases, the calling code really should differentiate between an +// unimplemented function and one that is implemented but returning NULL / +// invalied value. If this is the case, no get function wrapper exists. +// + +extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); + +// no set function; fields set durining local allocation + +extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); + +static inline const ident_t * +__kmp_get_user_lock_location( kmp_user_lock_p lck ) +{ + if ( __kmp_get_user_lock_location_ != NULL ) { + return ( *__kmp_get_user_lock_location_ )( lck ); + } + else { + return NULL; + } +} + +extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); + +static inline void +__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) +{ + if ( __kmp_set_user_lock_location_ != NULL ) { + ( *__kmp_set_user_lock_location_ )( lck, loc ); + } +} + +extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); + +extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); + +static inline void +__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) +{ + if ( __kmp_set_user_lock_flags_ != NULL ) { + ( *__kmp_set_user_lock_flags_ )( lck, flags ); + } +} + +// +// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. +// +extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); + +// +// Macros for binding user lock functions. +// +#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ + __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_acquire##nest##kind##_##suffix; \ + __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_release##nest##kind##_##suffix; \ + __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ + __kmp_test##nest##kind##_##suffix; \ + __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ + __kmp_init##nest##kind##_##suffix; \ + __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ + __kmp_destroy##nest##kind##_##suffix; \ +} + +#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) +#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) +#define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) +#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) + +// ---------------------------------------------------------------------------- +// User lock table & lock allocation +// ---------------------------------------------------------------------------- + +/* + On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which + is not enough to store a pointer, so we have to use lock indexes instead of pointers and + maintain lock table to map indexes to pointers. + + + Note: The first element of the table is not a pointer to lock! It is a pointer to previously + allocated table (or NULL if it is the first table). + + Usage: + + if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE + Lock table is fully utilized. User locks are indexes, so table is + used on user lock operation. + Note: it may be the case (lin_32) that we don't need to use a lock + table for regular locks, but do need the table for nested locks. + } + else { + Lock table initialized but not actually used. + } +*/ + +struct kmp_lock_table { + kmp_lock_index_t used; // Number of used elements + kmp_lock_index_t allocated; // Number of allocated elements + kmp_user_lock_p * table; // Lock table. +}; + +typedef struct kmp_lock_table kmp_lock_table_t; + +extern kmp_lock_table_t __kmp_user_lock_table; +extern kmp_user_lock_p __kmp_lock_pool; + +struct kmp_block_of_locks { + struct kmp_block_of_locks * next_block; + void * locks; +}; + +typedef struct kmp_block_of_locks kmp_block_of_locks_t; + +extern kmp_block_of_locks_t *__kmp_lock_blocks; +extern int __kmp_num_locks_in_block; + +extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); +extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); +extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); +extern void __kmp_cleanup_user_locks(); + +#define KMP_CHECK_USER_LOCK_INIT() \ + { \ + if ( ! TCR_4( __kmp_init_user_locks ) ) { \ + __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ + if ( ! TCR_4( __kmp_init_user_locks ) ) { \ + TCW_4( __kmp_init_user_locks, TRUE ); \ + } \ + __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ + } \ + } + +#endif // KMP_USE_DYNAMIC_LOCK + +#undef KMP_PAD +#undef KMP_GTID_DNE + +#if KMP_USE_DYNAMIC_LOCK + +// +// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current +// compatibility. Essential functionality of this new code is dynamic dispatch, but it also +// implements (or enables implementation of) hinted user lock and critical section which will be +// part of OMP 4.1 soon. +// +// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock +// function call on the created lock object requires type extraction and call through jump table +// using the extracted type. This type information is stored in two different ways depending on +// the size of the lock object, and we differentiate lock types by this size requirement - direct +// and indirect locks. +// +// Direct locks: +// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and +// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage +// for the lock type, and appropriate bit operation is required to access the data meaningful to +// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB +// of the lock object. The newly introduced "hle" lock is also a direct lock. +// +// Indirect locks: +// An indirect lock object requires more space than the compiler-generated space, and it should be +// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., +// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated +// indirect lock (void * fits in the object) or an index to the indirect lock table entry that +// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly +// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. +// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to +// differentiate the lock from a direct lock, and the remaining part is the actual index to the +// indirect lock table. +// + +#include <stdint.h> // for uintptr_t + +// Shortcuts +#define KMP_USE_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) +#define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 +#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 + +// List of lock definitions; all nested locks are indirect locks. +// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. +// All nested locks are indirect lock types. +#if KMP_USE_TSX +# if KMP_USE_FUTEX +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# else +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# endif // KMP_USE_FUTEX +# define KMP_LAST_D_LOCK lockseq_hle +#else +# if KMP_USE_FUTEX +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# define KMP_LAST_D_LOCK lockseq_futex +# else +# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) +# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# define KMP_LAST_D_LOCK lockseq_tas +# endif // KMP_USE_FUTEX +#endif // KMP_USE_TSX + +// Information used in dynamic dispatch +#define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks +#define KMP_FIRST_D_LOCK lockseq_tas +#define KMP_FIRST_I_LOCK lockseq_ticket +#define KMP_LAST_I_LOCK lockseq_nested_drdpa +#define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types + +// Base type for dynamic locks. +typedef kmp_uint32 kmp_dyna_lock_t; + +// Lock sequence that enumerates all lock kinds. +// Always make this enumeration consistent with kmp_lockseq_t in the include directory. +typedef enum { + lockseq_indirect = 0, +#define expand_seq(l,a) lockseq_##l, + KMP_FOREACH_D_LOCK(expand_seq, 0) + KMP_FOREACH_I_LOCK(expand_seq, 0) +#undef expand_seq +} kmp_dyna_lockseq_t; + +// Enumerates indirect lock tags. +typedef enum { +#define expand_tag(l,a) locktag_##l, + KMP_FOREACH_I_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_indirect_locktag_t; + +// Utility macros that extract information from lock sequences. +#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) +#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) +#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) +#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) + +// Enumerates direct lock tags starting from indirect tag. +typedef enum { +#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), + KMP_FOREACH_D_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_direct_locktag_t; + +// Indirect lock type +typedef struct { + kmp_user_lock_p lock; + kmp_indirect_locktag_t type; +} kmp_indirect_lock_t; + +// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. +extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); +extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); +extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); + +// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. +extern void (*__kmp_indirect_init[])(kmp_user_lock_p); +extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); +extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); + +// Extracts direct lock tag from a user lock pointer +#define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1)) + +// Extracts indirect lock index from a user lock pointer +#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) + +// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). +#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] + +// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). +#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] + +// Initializes a direct lock with the given lock pointer and lock sequence. +#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) + +// Initializes an indirect lock with the given lock pointer and lock sequence. +#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) + +// Returns "free" lock value for the given lock type. +#define KMP_LOCK_FREE(type) (locktag_##type) + +// Returns "busy" lock value for the given lock teyp. +#define KMP_LOCK_BUSY(v, type) ((v)<<KMP_LOCK_SHIFT | locktag_##type) + +// Returns lock value after removing (shifting) lock tag. +#define KMP_LOCK_STRIP(v) ((v)>>KMP_LOCK_SHIFT) + +// Initializes global states and data structures for managing dynamic user locks. +extern void __kmp_init_dynamic_user_locks(); + +// Allocates and returns an indirect lock with the given indirect lock tag. +extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); + +// Cleans up global states and data structures for managing dynamic user locks. +extern void __kmp_cleanup_indirect_user_locks(); + +// Default user lock sequence when not using hinted locks. +extern kmp_dyna_lockseq_t __kmp_user_lock_seq; + +// Jump table for "set lock location", available only for indirect locks. +extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); +#define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ + if (__kmp_indirect_set_location[(lck)->type] != NULL) \ + __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ +} + +// Jump table for "set lock flags", available only for indirect locks. +extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); +#define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ + if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ + __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ +} + +// Jump table for "get lock location", available only for indirect locks. +extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); +#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ + ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ + : NULL ) + +// Jump table for "get lock flags", available only for indirect locks. +extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); +#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ + ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ + : NULL ) + +#define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together + +// Lock table for indirect locks. +typedef struct kmp_indirect_lock_table { + kmp_indirect_lock_t **table; // blocks of indirect locks allocated + kmp_lock_index_t size; // size of the indirect lock table + kmp_lock_index_t next; // index to the next lock to be allocated +} kmp_indirect_lock_table_t; + +extern kmp_indirect_lock_table_t __kmp_i_lock_table; + +// Returns the indirect lock associated with the given index. +#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) + +// Number of locks in a lock block, which is fixed to "1" now. +// TODO: No lock block implementation now. If we do support, we need to manage lock block data +// structure for each indirect lock type. +extern int __kmp_num_locks_in_block; + +// Fast lock table lookup without consistency checking +#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ + ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l)) ) + +// Used once in kmp_error.c +extern kmp_int32 +__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); + +#else // KMP_USE_DYNAMIC_LOCK + +# define KMP_LOCK_BUSY(v, type) (v) +# define KMP_LOCK_FREE(type) 0 +# define KMP_LOCK_STRIP(v) (v) + +#endif // KMP_USE_DYNAMIC_LOCK + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* KMP_LOCK_H */ + |