1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
|
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !wasm
package runtime
import (
"internal/goarch"
"internal/runtime/atomic"
"internal/runtime/gc"
"unsafe"
)
// This implementation depends on OS-specific implementations of
//
// func semacreate(mp *m)
// Create a semaphore for mp, if it does not already have one.
//
// func semasleep(ns int64) int32
// If ns < 0, acquire m's semaphore and return 0.
// If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
// Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
//
// func semawakeup(mp *m)
// Wake up mp, which is or will soon be sleeping on its semaphore.
// The mutex state consists of four flags and a pointer. The flag at bit 0,
// mutexLocked, represents the lock itself. Bit 1, mutexSleeping, is a hint that
// the pointer is non-nil. The fast paths for locking and unlocking the mutex
// are based on atomic 8-bit swap operations on the low byte; bits 2 through 7
// are unused.
//
// Bit 8, mutexSpinning, is a try-lock that grants a waiting M permission to
// spin on the state word. Most other Ms must attempt to spend their time
// sleeping to reduce traffic on the cache line. This is the "spin bit" for
// which the implementation is named. (The anti-starvation mechanism also grants
// temporary permission for an M to spin.)
//
// Bit 9, mutexStackLocked, is a try-lock that grants an unlocking M permission
// to inspect the list of waiting Ms and to pop an M off of that stack.
//
// The upper bits hold a (partial) pointer to the M that most recently went to
// sleep. The sleeping Ms form a stack linked by their mWaitList.next fields.
// Because the fast paths use an 8-bit swap on the low byte of the state word,
// we'll need to reconstruct the full M pointer from the bits we have. Most Ms
// are allocated on the heap, and have a known alignment and base offset. (The
// offset is due to mallocgc's allocation headers.) The main program thread uses
// a static M value, m0. We check for m0 specifically and add a known offset
// otherwise.
const (
active_spin = 4 // referenced in proc.go for sync.Mutex implementation
active_spin_cnt = 30 // referenced in proc.go for sync.Mutex implementation
)
const (
mutexLocked = 0x001
mutexSleeping = 0x002
mutexSpinning = 0x100
mutexStackLocked = 0x200
mutexMMask = 0x3FF
mutexMOffset = gc.MallocHeaderSize // alignment of heap-allocated Ms (those other than m0)
mutexActiveSpinCount = 4
mutexActiveSpinSize = 30
mutexPassiveSpinCount = 1
mutexTailWakePeriod = 16
)
//go:nosplit
func key8(p *uintptr) *uint8 {
if goarch.BigEndian {
return &(*[8]uint8)(unsafe.Pointer(p))[goarch.PtrSize/1-1]
}
return &(*[8]uint8)(unsafe.Pointer(p))[0]
}
// mWaitList is part of the M struct, and holds the list of Ms that are waiting
// for a particular runtime.mutex.
//
// When an M is unable to immediately obtain a lock, it adds itself to the list
// of Ms waiting for the lock. It does that via this struct's next field,
// forming a singly-linked list with the mutex's key field pointing to the head
// of the list.
type mWaitList struct {
next muintptr // next m waiting for lock
startTicks int64 // when this m started waiting for the current lock holder, in cputicks
}
// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
func lockVerifyMSize() {
size := roundupsize(unsafe.Sizeof(mPadded{}), false) + gc.MallocHeaderSize
if size&mutexMMask != 0 {
print("M structure uses sizeclass ", size, "/", hex(size), " bytes; ",
"incompatible with mutex flag mask ", hex(mutexMMask), "\n")
throw("runtime.m memory alignment too small for spinbit mutex")
}
}
// mutexWaitListHead recovers a full muintptr that was missing its low bits.
// With the exception of the static m0 value, it requires allocating runtime.m
// values in a size class with a particular minimum alignment. The 2048-byte
// size class allows recovering the full muintptr value even after overwriting
// the low 11 bits with flags. We can use those 11 bits as 3 flags and an
// atomically-swapped byte.
//
//go:nosplit
func mutexWaitListHead(v uintptr) muintptr {
if highBits := v &^ mutexMMask; highBits == 0 {
return 0
} else if m0bits := muintptr(unsafe.Pointer(&m0)); highBits == uintptr(m0bits)&^mutexMMask {
return m0bits
} else {
return muintptr(highBits + mutexMOffset)
}
}
// mutexPreferLowLatency reports if this mutex prefers low latency at the risk
// of performance collapse. If so, we can allow all waiting threads to spin on
// the state word rather than go to sleep.
//
// TODO: We could have the waiting Ms each spin on their own private cache line,
// especially if we can put a bound on the on-CPU time that would consume.
//
// TODO: If there's a small set of mutex values with special requirements, they
// could make use of a more specialized lock2/unlock2 implementation. Otherwise,
// we're constrained to what we can fit within a single uintptr with no
// additional storage on the M for each lock held.
//
//go:nosplit
func mutexPreferLowLatency(l *mutex) bool {
switch l {
default:
return false
case &sched.lock:
// We often expect sched.lock to pass quickly between Ms in a way that
// each M has unique work to do: for instance when we stop-the-world
// (bringing each P to idle) or add new netpoller-triggered work to the
// global run queue.
return true
}
}
func mutexContended(l *mutex) bool {
return atomic.Loaduintptr(&l.key)&^mutexMMask != 0
}
func lock(l *mutex) {
lockWithRank(l, getLockRank(l))
}
func lock2(l *mutex) {
gp := getg()
if gp.m.locks < 0 {
throw("runtime·lock: lock count")
}
gp.m.locks++
k8 := key8(&l.key)
// Speculative grab for lock.
v8 := atomic.Xchg8(k8, mutexLocked)
if v8&mutexLocked == 0 {
if v8&mutexSleeping != 0 {
atomic.Or8(k8, mutexSleeping)
}
return
}
semacreate(gp.m)
var startTime int64
// On uniprocessors, no point spinning.
// On multiprocessors, spin for mutexActiveSpinCount attempts.
spin := 0
if numCPUStartup > 1 {
spin = mutexActiveSpinCount
}
var weSpin, atTail, haveTimers bool
v := atomic.Loaduintptr(&l.key)
tryAcquire:
for i := 0; ; i++ {
if v&mutexLocked == 0 {
if weSpin {
next := (v &^ mutexSpinning) | mutexSleeping | mutexLocked
if next&^mutexMMask == 0 {
// The fast-path Xchg8 may have cleared mutexSleeping. Fix
// the hint so unlock2 knows when to use its slow path.
next = next &^ mutexSleeping
}
if atomic.Casuintptr(&l.key, v, next) {
gp.m.mLockProfile.end(startTime)
return
}
} else {
prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping)
if prev8&mutexLocked == 0 {
gp.m.mLockProfile.end(startTime)
return
}
}
v = atomic.Loaduintptr(&l.key)
continue tryAcquire
}
if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) {
v |= mutexSpinning
weSpin = true
}
if weSpin || atTail || mutexPreferLowLatency(l) {
if i < spin {
procyield(mutexActiveSpinSize)
v = atomic.Loaduintptr(&l.key)
continue tryAcquire
} else if i < spin+mutexPassiveSpinCount {
osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.
v = atomic.Loaduintptr(&l.key)
continue tryAcquire
}
}
// Go to sleep
if v&mutexLocked == 0 {
throw("runtime·lock: sleeping while lock is available")
}
// Collect times for mutex profile (seen in unlock2 only via mWaitList),
// and for "/sync/mutex/wait/total:seconds" metric (to match).
if !haveTimers {
gp.m.mWaitList.startTicks = cputicks()
startTime = gp.m.mLockProfile.start()
haveTimers = true
}
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
gp.m.mWaitList.next = mutexWaitListHead(v)
// Pack a (partial) pointer to this M with the current lock state bits
next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
if weSpin { // If we were spinning, prepare to retire
next = next &^ mutexSpinning
}
if atomic.Casuintptr(&l.key, v, next) {
weSpin = false
// We've pushed ourselves onto the stack of waiters. Wait.
semasleep(-1)
atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
i = 0
}
gp.m.mWaitList.next = 0
v = atomic.Loaduintptr(&l.key)
}
}
func unlock(l *mutex) {
unlockWithRank(l)
}
// We might not be holding a p in this code.
//
//go:nowritebarrier
func unlock2(l *mutex) {
gp := getg()
var prev8 uint8
var haveStackLock bool
var endTicks int64
if !mutexSampleContention() {
// Not collecting a sample for the contention profile, do the quick release
prev8 = atomic.Xchg8(key8(&l.key), 0)
} else {
// If there's contention, we'll sample it. Don't allow another
// lock2/unlock2 pair to finish before us and take our blame. Prevent
// that by trading for the stack lock with a CAS.
v := atomic.Loaduintptr(&l.key)
for {
if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
// No contention, or (stack lock unavailable) no way to calculate it
prev8 = atomic.Xchg8(key8(&l.key), 0)
endTicks = 0
break
}
// There's contention, the stack lock appeared to be available, and
// we'd like to collect a sample for the contention profile.
if endTicks == 0 {
// Read the time before releasing the lock. The profile will be
// strictly smaller than what other threads would see by timing
// their lock calls.
endTicks = cputicks()
}
next := (v | mutexStackLocked) &^ (mutexLocked | mutexSleeping)
if atomic.Casuintptr(&l.key, v, next) {
haveStackLock = true
prev8 = uint8(v)
// The fast path of lock2 may have cleared mutexSleeping.
// Restore it so we're sure to call unlock2Wake below.
prev8 |= mutexSleeping
break
}
v = atomic.Loaduintptr(&l.key)
}
}
if prev8&mutexLocked == 0 {
throw("unlock of unlocked lock")
}
if prev8&mutexSleeping != 0 {
unlock2Wake(l, haveStackLock, endTicks)
}
gp.m.mLockProfile.store()
gp.m.locks--
if gp.m.locks < 0 {
throw("runtime·unlock: lock count")
}
if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
gp.stackguard0 = stackPreempt
}
}
// mutexSampleContention returns whether the current mutex operation should
// report any contention it discovers.
func mutexSampleContention() bool {
if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 {
return false
} else {
// TODO: have SetMutexProfileFraction do the clamping
rate32 := uint32(rate)
if int64(rate32) != rate {
rate32 = ^uint32(0)
}
return cheaprandn(rate32) == 0
}
}
// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
//
//go:nowritebarrier
func unlock2Wake(l *mutex, haveStackLock bool, endTicks int64) {
v := atomic.Loaduintptr(&l.key)
// On occasion, seek out and wake the M at the bottom of the stack so it
// doesn't starve.
antiStarve := cheaprandn(mutexTailWakePeriod) == 0
if haveStackLock {
goto useStackLock
}
if !(antiStarve || // avoiding starvation may require a wake
v&mutexSpinning == 0 || // no spinners means we must wake
mutexPreferLowLatency(l)) { // prefer waiters be awake as much as possible
return
}
for {
if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
// No waiting Ms means nothing to do.
//
// If the stack lock is unavailable, its owner would make the same
// wake decisions that we would, so there's nothing for us to do.
//
// Although: This thread may have a different call stack, which
// would result in a different entry in the mutex contention profile
// (upon completion of go.dev/issue/66999). That could lead to weird
// results if a slow critical section ends but another thread
// quickly takes the lock, finishes its own critical section,
// releases the lock, and then grabs the stack lock. That quick
// thread would then take credit (blame) for the delay that this
// slow thread caused. The alternative is to have more expensive
// atomic operations (a CAS) on the critical path of unlock2.
return
}
// Other M's are waiting for the lock.
// Obtain the stack lock, and pop off an M.
next := v | mutexStackLocked
if atomic.Casuintptr(&l.key, v, next) {
break
}
v = atomic.Loaduintptr(&l.key)
}
// We own the mutexStackLocked flag. New Ms may push themselves onto the
// stack concurrently, but we're now the only thread that can remove or
// modify the Ms that are sleeping in the list.
useStackLock:
if endTicks != 0 {
// Find the M at the bottom of the stack of waiters, which has been
// asleep for the longest. Take the average of its wait time and the
// head M's wait time for the mutex contention profile, matching the
// estimate we do in semrelease1 (for sync.Mutex contention).
//
// We don't keep track of the tail node (we don't need it often), so do
// an O(N) walk on the list of sleeping Ms to find it.
head := mutexWaitListHead(v).ptr()
for node, n := head, 0; ; {
n++
next := node.mWaitList.next.ptr()
if next == nil {
cycles := ((endTicks - head.mWaitList.startTicks) + (endTicks - node.mWaitList.startTicks)) / 2
node.mWaitList.startTicks = endTicks
head.mWaitList.startTicks = endTicks
getg().m.mLockProfile.recordUnlock(cycles * int64(n))
break
}
node = next
}
}
var committed *m // If we choose an M within the stack, we've made a promise to wake it
for {
headM := v &^ mutexMMask
flags := v & (mutexMMask &^ mutexStackLocked) // preserve low bits, but release stack lock
mp := mutexWaitListHead(v).ptr()
wakem := committed
if committed == nil {
if v&mutexSpinning == 0 || mutexPreferLowLatency(l) {
wakem = mp
}
if antiStarve {
// Wake the M at the bottom of the stack of waiters. (This is
// O(N) with the number of waiters.)
wakem = mp
prev := mp
for {
next := wakem.mWaitList.next.ptr()
if next == nil {
break
}
prev, wakem = wakem, next
}
if wakem != mp {
committed = wakem
prev.mWaitList.next = wakem.mWaitList.next
// An M sets its own startTicks when it first goes to sleep.
// When an unlock operation is sampled for the mutex
// contention profile, it takes blame for the entire list of
// waiting Ms but only updates the startTicks value at the
// tail. Copy any updates to the next-oldest M.
prev.mWaitList.startTicks = wakem.mWaitList.startTicks
}
}
}
if wakem == mp {
headM = uintptr(mp.mWaitList.next) &^ mutexMMask
}
next := headM | flags
if atomic.Casuintptr(&l.key, v, next) {
if wakem != nil {
// Claimed an M. Wake it.
semawakeup(wakem)
}
return
}
v = atomic.Loaduintptr(&l.key)
}
}
|