contrib/libs/cxxsupp/libcxxcuda11/src/atomic.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <__config>
#ifndef _LIBCPP_HAS_NO_THREADS

#include <__thread/timed_backoff_policy.h>
#include <atomic>
#include <climits>
#include <functional>
#include <thread>

#include "include/apple_availability.h"

#ifdef __linux__

#include <unistd.h>
#include <linux/futex.h>
#include <sys/syscall.h>

// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures
// with a 64 bit time_t, we need to specify SYS_futex_time64.
#if !defined(SYS_futex) && defined(SYS_futex_time64)
# define SYS_futex SYS_futex_time64
#endif

#elif defined(__FreeBSD__)

#include <sys/types.h>
#include <sys/umtx.h>

#else // <- Add other operating systems here

// Baseline needs no new headers

#endif

_LIBCPP_BEGIN_NAMESPACE_STD

#ifdef __linux__

static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              __cxx_contention_t __val)
{
    static constexpr timespec __timeout = { 2, 0 };
    syscall(SYS_futex, __ptr, FUTEX_WAIT_PRIVATE, __val, &__timeout, 0, 0);
}

static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              bool __notify_one)
{
    syscall(SYS_futex, __ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0);
}

#elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)

extern "C" int __ulock_wait(uint32_t operation, void *addr, uint64_t value,
                            uint32_t timeout); /* timeout is specified in microseconds */
extern "C" int __ulock_wake(uint32_t operation, void *addr, uint64_t wake_value);

#define UL_COMPARE_AND_WAIT 1
#define ULF_WAKE_ALL        0x00000100

static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              __cxx_contention_t __val)
{
    __ulock_wait(UL_COMPARE_AND_WAIT,
                 const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 0);
}

static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              bool __notify_one)
{
    __ulock_wake(UL_COMPARE_AND_WAIT | (__notify_one ? 0 : ULF_WAKE_ALL),
                 const_cast<__cxx_atomic_contention_t*>(__ptr), 0);
}

#elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
/*
 * Since __cxx_contention_t is int64_t even on 32bit FreeBSD
 * platforms, we have to use umtx ops that work on the long type, and
 * limit its use to architectures where long and int64_t are synonyms.
 */

static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              __cxx_contention_t __val)
{
    _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr),
             UMTX_OP_WAIT, __val, NULL, NULL);
}

static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              bool __notify_one)
{
    _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr),
             UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, NULL, NULL);
}

#else // <- Add other operating systems here

// Baseline is just a timed backoff

static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr,
                                              __cxx_contention_t __val)
{
    __libcpp_thread_poll_with_backoff([=]() -> bool {
        return !__cxx_nonatomic_compare_equal(__cxx_atomic_load(__ptr, memory_order_relaxed), __val);
    }, __libcpp_timed_backoff_policy());
}

static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile*, bool) { }

#endif // __linux__

static constexpr size_t __libcpp_contention_table_size = (1 << 8);  /* < there's no magic in this number */

struct alignas(64) /*  aim to avoid false sharing */ __libcpp_contention_table_entry
{
    __cxx_atomic_contention_t __contention_state;
    __cxx_atomic_contention_t __platform_state;
    inline constexpr __libcpp_contention_table_entry() :
        __contention_state(0), __platform_state(0) { }
};

static __libcpp_contention_table_entry __libcpp_contention_table[ __libcpp_contention_table_size ];

static hash<void const volatile*> __libcpp_contention_hasher;

static __libcpp_contention_table_entry* __libcpp_contention_state(void const volatile * p)
{
    return &__libcpp_contention_table[__libcpp_contention_hasher(p) & (__libcpp_contention_table_size - 1)];
}

/* Given an atomic to track contention and an atomic to actually wait on, which may be
   the same atomic, we try to detect contention to avoid spuriously calling the platform. */

static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __contention_state,
                                       __cxx_atomic_contention_t const volatile* __platform_state,
                                       bool __notify_one)
{
    if(0 != __cxx_atomic_load(__contention_state, memory_order_seq_cst))
        // We only call 'wake' if we consumed a contention bit here.
        __libcpp_platform_wake_by_address(__platform_state, __notify_one);
}
static __cxx_contention_t __libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__contention_state*/,
                                                               __cxx_atomic_contention_t const volatile* __platform_state)
{
    // We will monitor this value.
    return __cxx_atomic_load(__platform_state, memory_order_acquire);
}
static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
                                     __cxx_atomic_contention_t const volatile* __platform_state,
                                     __cxx_contention_t __old_value)
{
    __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_seq_cst);
    // We sleep as long as the monitored value hasn't changed.
    __libcpp_platform_wait_on_address(__platform_state, __old_value);
    __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
}

/* When the incoming atomic is the wrong size for the platform wait size, need to
   launder the value sequence through an atomic from our table. */

static void __libcpp_atomic_notify(void const volatile* __location)
{
    auto const __entry = __libcpp_contention_state(__location);
    // The value sequence laundering happens on the next line below.
    __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_release);
    __libcpp_contention_notify(&__entry->__contention_state,
                               &__entry->__platform_state,
                               false /* when laundering, we can't handle notify_one */);
}
_LIBCPP_EXPORTED_FROM_ABI
void __cxx_atomic_notify_one(void const volatile* __location)
    { __libcpp_atomic_notify(__location); }
_LIBCPP_EXPORTED_FROM_ABI
void __cxx_atomic_notify_all(void const volatile* __location)
    { __libcpp_atomic_notify(__location); }
_LIBCPP_EXPORTED_FROM_ABI
__cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location)
{
    auto const __entry = __libcpp_contention_state(__location);
    return __libcpp_contention_monitor_for_wait(&__entry->__contention_state, &__entry->__platform_state);
}
_LIBCPP_EXPORTED_FROM_ABI
void __libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value)
{
    auto const __entry = __libcpp_contention_state(__location);
    __libcpp_contention_wait(&__entry->__contention_state, &__entry->__platform_state, __old_value);
}

/* When the incoming atomic happens to be the platform wait size, we still need to use the
   table for the contention detection, but we can use the atomic directly for the wait. */

_LIBCPP_EXPORTED_FROM_ABI
void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location)
{
    __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, true);
}
_LIBCPP_EXPORTED_FROM_ABI
void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location)
{
    __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
}
_LIBCPP_EXPORTED_FROM_ABI
__cxx_contention_t __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location)
{
    return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__contention_state, __location);
}
_LIBCPP_EXPORTED_FROM_ABI
void __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value)
{
    __libcpp_contention_wait(&__libcpp_contention_state(__location)->__contention_state, __location, __old_value);
}

_LIBCPP_END_NAMESPACE_STD

#endif //_LIBCPP_HAS_NO_THREADS