1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #define LOCK_PRIVATE 1
30
31 #include <mach_ldebug.h>
32 #include <kern/locks_internal.h>
33 #include <kern/lock_stat.h>
34 #include <kern/locks.h>
35 #include <kern/kalloc.h>
36 #include <kern/thread.h>
37
38 #include <mach/machine/sdt.h>
39
40 #include <machine/cpu_data.h>
41 #include <machine/machine_cpu.h>
42
43 #if !LCK_MTX_USE_ARCH
44
45 /*
46 * lck_mtx_t
47 * ~~~~~~~~~
48 *
49 * Kernel mutexes in this implementation are made of four 32 bits words:
50 *
51 * - word 0: turnstile compact ID (24 bits) and the 0x22 lock tag
52 * - word 1: padding (to be used for group compact IDs)
53 * - word 2: mutex state (lock owner + interlock, spin and waiters bits),
54 * refered to as "data" in the code.
55 * - word 3: adaptive spin and interlock MCS queue tails.
56 *
57 * The 64 bits word made of the last two words is refered to
58 * as the "mutex state" in code.
59 *
60 *
61 * Core serialization rules
62 * ~~~~~~~~~~~~~~~~~~~~~~~~
63 *
64 * The mutex has a bit (lck_mtx_t::lck_mtx.ilocked or bit LCK_MTX_ILOCK
65 * of the data word) that serves as a spinlock for the mutex state.
66 *
67 *
68 * Updating the lock fields must follow the following rules:
69 *
70 * - It is ok to "steal" the mutex (updating its data field) if no one
71 * holds the interlock.
72 *
73 * - Holding the interlock allows its holder to update the first 3 words
74 * of the kernel mutex without using RMW atomics (plain stores are OK).
75 *
76 * - Holding the interlock is required for a thread to remove itself
77 * from the adaptive spin queue.
78 *
79 * - Threads can enqueue themselves onto the adaptive spin wait queue
80 * or the interlock wait queue at any time.
81 *
82 *
83 * Waiters bit and turnstiles
84 * ~~~~~~~~~~~~~~~~~~~~~~~~~~
85 *
86 * The turnstile on a kernel mutex is set by waiters, and cleared
87 * once they have all been resumed and successfully acquired the lock.
88 *
89 * LCK_MTX_NEEDS_WAKEUP being set (always with an owner set too)
90 * forces threads to the lck_mtx_unlock slowpath,
91 * in order to evaluate whether lck_mtx_unlock_wakeup() must be called.
92 *
93 * As a result it means it really only needs to be set at select times:
94 *
95 * - when a thread blocks and "snitches" on the current thread owner,
96 * so that when that thread unlocks it calls wake up,
97 *
98 * - when a thread that was woken up resumes its work and became
99 * the inheritor.
100 */
101
102 #define ADAPTIVE_SPIN_ENABLE 0x1
103
104 #define NOINLINE __attribute__((noinline))
105 #define LCK_MTX_EVENT(lck) CAST_EVENT64_T(&(lck)->lck_mtx.data)
106 #define LCK_EVENT_TO_MUTEX(e) __container_of((uint32_t *)(e), lck_mtx_t, lck_mtx.data)
107 #define LCK_MTX_HAS_WAITERS(l) ((l)->lck_mtx.data & LCK_MTX_NEEDS_WAKEUP)
108
109 #if DEVELOPMENT || DEBUG
110 TUNABLE(bool, LckDisablePreemptCheck, "-disable_mtx_chk", false);
111 #endif /* DEVELOPMENT || DEBUG */
112
113 extern unsigned int not_in_kdp;
114
115 #if CONFIG_SPTM
116 extern const bool * sptm_xnu_triggered_panic_ptr;
117 #endif /* CONFIG_SPTM */
118
119 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
120
121 #define LCK_MTX_NULL_CTID 0x00000000u
122
123 __enum_decl(lck_mtx_mode_t, uint32_t, {
124 LCK_MTX_MODE_SLEEPABLE,
125 LCK_MTX_MODE_SPIN,
126 LCK_MTX_MODE_SPIN_ALWAYS,
127 });
128
129 __enum_decl(lck_ilk_mode_t, uint32_t, {
130 LCK_ILK_MODE_UNLOCK,
131 LCK_ILK_MODE_DIRECT,
132 LCK_ILK_MODE_FROM_AS,
133 });
134
135 static inline void
lck_mtx_mcs_clear(lck_mtx_mcs_t mcs)136 lck_mtx_mcs_clear(lck_mtx_mcs_t mcs)
137 {
138 *mcs = (struct lck_mtx_mcs){ };
139 }
140
141 static inline lck_mcs_id_t
lck_mtx_get_mcs_id(void)142 lck_mtx_get_mcs_id(void)
143 {
144 return lck_mcs_id_current(LCK_MCS_SLOT_0);
145 }
146
147 __pure2
148 static inline lck_mtx_mcs_t
lck_mtx_get_mcs(lck_mcs_id_t idx)149 lck_mtx_get_mcs(lck_mcs_id_t idx)
150 {
151 return &lck_mcs_get_other(idx)->mcs_mtx;
152 }
153
154
155 #pragma mark lck_mtx_t: validation
156
157 __abortlike
158 static void
__lck_mtx_invalid_panic(lck_mtx_t * lck)159 __lck_mtx_invalid_panic(lck_mtx_t *lck)
160 {
161 panic("Invalid/destroyed mutex %p: "
162 "<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
163 lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
164 lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
165 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
166 }
167
168 __abortlike
169 static void
__lck_mtx_not_owned_panic(lck_mtx_t * lock,thread_t thread)170 __lck_mtx_not_owned_panic(lck_mtx_t *lock, thread_t thread)
171 {
172 panic("Mutex %p is unexpectedly not owned by thread %p", lock, thread);
173 }
174
175 #if !LCK_MTX_USE_ARCH
176 __abortlike
177 static void
__lck_mtx_not_locked_spin(lck_mtx_t * lock,thread_t thread)178 __lck_mtx_not_locked_spin(lck_mtx_t *lock, thread_t thread)
179 {
180 panic("Mutex %p is unexpectedly not locked in spin mode by thread %p",
181 lock, thread);
182 }
183 #endif /* !LCK_MTX_USE_ARCH */
184
185 __abortlike
186 static void
__lck_mtx_owned_panic(lck_mtx_t * lock,thread_t thread)187 __lck_mtx_owned_panic(lck_mtx_t *lock, thread_t thread)
188 {
189 panic("Mutex %p is unexpectedly owned by thread %p", lock, thread);
190 }
191
192 __abortlike
193 static void
__lck_mtx_lock_is_sleepable_panic(lck_mtx_t * lck)194 __lck_mtx_lock_is_sleepable_panic(lck_mtx_t *lck)
195 {
196 // "Always" variants can never block. If the lock is held as a normal mutex
197 // then someone is mixing always and non-always calls on the same lock, which is
198 // forbidden.
199 panic("Mutex %p is held as a full-mutex (spin-always lock attempted)", lck);
200 }
201
202 #if DEVELOPMENT || DEBUG
203 __abortlike
204 static void
__lck_mtx_preemption_disabled_panic(lck_mtx_t * lck,int expected)205 __lck_mtx_preemption_disabled_panic(lck_mtx_t *lck, int expected)
206 {
207 panic("Attempt to take mutex %p with preemption disabled (%d)",
208 lck, get_preemption_level() - expected);
209 }
210
211 __abortlike
212 static void
__lck_mtx_at_irq_panic(lck_mtx_t * lck)213 __lck_mtx_at_irq_panic(lck_mtx_t *lck)
214 {
215 panic("Attempt to take mutex %p in IRQ context", lck);
216 }
217
218 /*
219 * Routine: lck_mtx_check_preemption
220 *
221 * Verify preemption is enabled when attempting to acquire a mutex.
222 */
223 static inline void
lck_mtx_check_preemption(lck_mtx_t * lock,thread_t thread,int expected)224 lck_mtx_check_preemption(lck_mtx_t *lock, thread_t thread, int expected)
225 {
226 #pragma unused(thread)
227 if (lock_preemption_level_for_thread(thread) == expected) {
228 return;
229 }
230 if (LckDisablePreemptCheck) {
231 return;
232 }
233 if (current_cpu_datap()->cpu_hibernate) {
234 return;
235 }
236 if (startup_phase < STARTUP_SUB_EARLY_BOOT) {
237 return;
238 }
239 #if CONFIG_SPTM
240 /*
241 * If a panic has been initiated on SPTM devices, preemption was disabled by sleh,
242 * but platform callbacks could be acquiring mutexes
243 */
244 if (*sptm_xnu_triggered_panic_ptr) {
245 return;
246 }
247 #endif
248 __lck_mtx_preemption_disabled_panic(lock, expected);
249 }
250
251 static inline void
lck_mtx_check_irq(lck_mtx_t * lock)252 lck_mtx_check_irq(lck_mtx_t *lock)
253 {
254 if (ml_at_interrupt_context()) {
255 __lck_mtx_at_irq_panic(lock);
256 }
257 }
258
259 #define LCK_MTX_SNIFF_PREEMPTION(thread) lock_preemption_level_for_thread(thread)
260 #define LCK_MTX_CHECK_INVARIANTS 1
261 #else
262 #define lck_mtx_check_irq(lck) ((void)0)
263 #define LCK_MTX_SNIFF_PREEMPTION(thread) 0
264 #define LCK_MTX_CHECK_INVARIANTS 0
265 #endif /* !DEVELOPMENT && !DEBUG */
266
267 #if CONFIG_DTRACE
268 #define LCK_MTX_SNIFF_DTRACE() lck_debug_state.lds_value
269 #else
270 #define LCK_MTX_SNIFF_DTRACE() 0
271 #endif
272
273
274 #pragma mark lck_mtx_t: alloc/init/destroy/free
275
276 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)277 lck_mtx_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
278 {
279 lck_mtx_t *lck;
280
281 lck = zalloc(KT_LCK_MTX);
282 lck_mtx_init(lck, grp, attr);
283 return lck;
284 }
285
286 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)287 lck_mtx_free(lck_mtx_t *lck, lck_grp_t *grp)
288 {
289 lck_mtx_destroy(lck, grp);
290 zfree(KT_LCK_MTX, lck);
291 }
292
293 __mockable void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)294 lck_mtx_init(lck_mtx_t *lck, lck_grp_t *grp, lck_attr_t *attr)
295 {
296 if (attr == LCK_ATTR_NULL) {
297 attr = &lck_attr_default;
298 }
299
300 *lck = (lck_mtx_t){
301 .lck_mtx_type = LCK_TYPE_MUTEX,
302 .lck_mtx_grp = grp->lck_grp_attr_id,
303 };
304 if (attr->lck_attr_val & LCK_ATTR_DEBUG) {
305 lck->lck_mtx.data |= LCK_MTX_PROFILE;
306 }
307
308 lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
309 }
310
311 __mockable void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)312 lck_mtx_destroy(lck_mtx_t *lck, lck_grp_t *grp)
313 {
314 if (lck->lck_mtx_tsid && lck->lck_mtx_type == LCK_TYPE_MUTEX) {
315 panic("Mutex to destroy still has waiters: %p: "
316 "<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
317 lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
318 lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
319 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
320 }
321 if (lck->lck_mtx_type != LCK_TYPE_MUTEX ||
322 (lck->lck_mtx.data & ~LCK_MTX_PROFILE) ||
323 lck->lck_mtx.as_tail || lck->lck_mtx.ilk_tail) {
324 __lck_mtx_invalid_panic(lck);
325 }
326 LCK_GRP_ASSERT_ID(grp, lck->lck_mtx_grp);
327 lck->lck_mtx_type = LCK_TYPE_NONE;
328 lck->lck_mtx.data = LCK_MTX_TAG_DESTROYED;
329 lck->lck_mtx_grp = 0;
330 lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
331 }
332
333
334 #pragma mark lck_mtx_t: lck_mtx_ilk*
335
336 static hw_spin_timeout_status_t
lck_mtx_ilk_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)337 lck_mtx_ilk_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
338 {
339 lck_mtx_t *lck = _lock;
340
341 panic("Mutex interlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
342 "current owner: %p, "
343 "<0x%06x 0x%02x 0x%08x 0x%08x 0x%04x 0x%04x>, "
344 HW_SPIN_TIMEOUT_DETAILS_FMT,
345 lck, HW_SPIN_TIMEOUT_ARG(to, st),
346 ctid_get_thread_unsafe(lck->lck_mtx.owner),
347 lck->lck_mtx_tsid, lck->lck_mtx_type,
348 lck->lck_mtx_grp, lck->lck_mtx.data,
349 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail,
350 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
351 }
352
353 static const struct hw_spin_policy lck_mtx_ilk_timeout_policy = {
354 .hwsp_name = "lck_mtx_t (ilk)",
355 .hwsp_timeout_atomic = &lock_panic_timeout,
356 .hwsp_op_timeout = lck_mtx_ilk_timeout_panic,
357 };
358
359 static void
lck_mtx_ilk_lock_cleanup_as_mcs(lck_mtx_t * lock,lck_mcs_id_t idx,lck_mtx_mcs_t mcs,hw_spin_timeout_t to,hw_spin_state_t * ss)360 lck_mtx_ilk_lock_cleanup_as_mcs(
361 lck_mtx_t *lock,
362 lck_mcs_id_t idx,
363 lck_mtx_mcs_t mcs,
364 hw_spin_timeout_t to,
365 hw_spin_state_t *ss)
366 {
367 lck_mtx_mcs_t nnode = NULL;
368 lck_mcs_id_t pidx = (lck_mcs_id_t)mcs->lmm_as_prev;
369 bool was_last;
370
371 /*
372 * This is called when the thread made use
373 * of the adaptive spin queue and needs
374 * to remove itself from it.
375 */
376
377 /*
378 * If the thread is last, set the tail to the node before us.
379 */
380 was_last = lock_cmpxchg(&lock->lck_mtx.as_tail, idx, pidx, release);
381
382 if (was_last) {
383 /*
384 * If @c mcs was last, we need to erase the previous
385 * node link to it.
386 *
387 * However, new nodes could have now taken our place
388 * and set the previous node's @c lmm_as_next field
389 * already, so we must CAS rather than blindly set.
390 *
391 * We know the previous node is stable because
392 * we hold the interlock (preventing concurrent
393 * removals).
394 */
395 if (pidx) {
396 os_atomic_cmpxchg(&lck_mtx_get_mcs(pidx)->lmm_as_next,
397 mcs, nnode, relaxed);
398 }
399 } else {
400 /*
401 * If @c mcs wasn't last, then wait to make sure
402 * we observe @c lmm_as_next. Once we do, we know
403 * the field is stable since we hold the interlock
404 * (preventing concurrent dequeues).
405 *
406 * We can then update it to @c mcs next node index
407 * (which is also stable for similar reasons).
408 *
409 * Lastly update the previous node @c lmm_as_next
410 * field as well to terminate the dequeue.
411 */
412 while (!hw_spin_wait_until(&mcs->lmm_as_next, nnode, nnode)) {
413 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
414 hw_spin_should_keep_spinning(lock, pol, to, ss);
415 }
416
417 os_atomic_store(&nnode->lmm_as_prev, pidx, relaxed);
418 if (pidx) {
419 os_atomic_store(&lck_mtx_get_mcs(pidx)->lmm_as_next,
420 nnode, relaxed);
421 }
422 }
423
424 /*
425 * @c mcs's fields are left dangling,
426 * it is the responsibilty of the caller
427 * to terminate the cleanup.
428 */
429 }
430
431 static NOINLINE void
lck_mtx_ilk_lock_contended(lck_mtx_t * lock,lck_mtx_state_t state,lck_ilk_mode_t mode)432 lck_mtx_ilk_lock_contended(
433 lck_mtx_t *lock,
434 lck_mtx_state_t state,
435 lck_ilk_mode_t mode)
436 {
437 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
438 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
439 hw_spin_state_t ss = { };
440
441 lck_mtx_mcs_t mcs, nnode, pnode;
442 lck_mcs_id_t idx, pidx;
443 lck_mtx_state_t nstate;
444 unsigned long ready;
445 uint64_t spin_start;
446
447 /*
448 * Take a spot in the interlock MCS queue,
449 * and then spin until we're at the head of it.
450 */
451
452 idx = lck_mtx_get_mcs_id();
453 mcs = &lck_mcs_get_current()->mcs_mtx;
454 if (mode != LCK_MTX_MODE_SPIN) {
455 spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
456 }
457
458 mcs->lmm_ilk_current = lock;
459 pidx = os_atomic_xchg(&lock->lck_mtx.ilk_tail, idx, release);
460 if (pidx) {
461 pnode = lck_mtx_get_mcs(pidx);
462 os_atomic_store(&pnode->lmm_ilk_next, mcs, relaxed);
463
464 while (!hw_spin_wait_until(&mcs->lmm_ilk_ready, ready, ready)) {
465 hw_spin_should_keep_spinning(lock, pol, to, &ss);
466 }
467 }
468
469
470 /*
471 * We're now the first in line, wait for the interlock
472 * to look ready and take it.
473 *
474 * We can't just assume the lock is ours for the taking,
475 * because the fastpath of lck_mtx_lock_spin{,_always}
476 * only look at the mutex "data" and might steal it.
477 *
478 * Also clear the interlock MCS tail if @c mcs is last.
479 */
480 do {
481 while (!hw_spin_wait_until(&lock->lck_mtx.val,
482 state.val, state.ilocked == 0)) {
483 hw_spin_should_keep_spinning(lock, pol, to, &ss);
484 }
485
486 nstate = state;
487 nstate.ilocked = 1;
488 if (nstate.ilk_tail == idx) {
489 nstate.ilk_tail = 0;
490 }
491 } while (!os_atomic_cmpxchg(&lock->lck_mtx, state, nstate, acquire));
492
493
494 /*
495 * We now have the interlock, let's cleanup the MCS state.
496 *
497 * First, if there is a node after us, notify that it
498 * is at the head of the interlock queue.
499 *
500 * Second, perform the adaptive spin MCS cleanup if needed.
501 *
502 * Lastly, clear the MCS node.
503 */
504 if (state.ilk_tail != idx) {
505 while (!hw_spin_wait_until(&mcs->lmm_ilk_next, nnode, nnode)) {
506 hw_spin_should_keep_spinning(lock, pol, to, &ss);
507 }
508
509 os_atomic_store(&nnode->lmm_ilk_ready, 1, relaxed);
510 }
511
512 if (mode == LCK_ILK_MODE_FROM_AS) {
513 lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, &ss);
514 }
515 lck_mtx_mcs_clear(mcs);
516
517 if (mode != LCK_MTX_MODE_SPIN) {
518 LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
519 }
520 }
521
522 static void
lck_mtx_ilk_lock_nopreempt(lck_mtx_t * lock,lck_ilk_mode_t mode)523 lck_mtx_ilk_lock_nopreempt(lck_mtx_t *lock, lck_ilk_mode_t mode)
524 {
525 lck_mtx_state_t state, nstate;
526
527 os_atomic_rmw_loop(&lock->lck_mtx.val, state.val, nstate.val, acquire, {
528 if (__improbable(state.ilocked || state.ilk_tail)) {
529 os_atomic_rmw_loop_give_up({
530 return lck_mtx_ilk_lock_contended(lock, state, mode);
531 });
532 }
533
534 nstate = state;
535 nstate.ilocked = true;
536 });
537 }
538
539 static void
lck_mtx_ilk_unlock_v(lck_mtx_t * lock,uint32_t data)540 lck_mtx_ilk_unlock_v(lck_mtx_t *lock, uint32_t data)
541 {
542 os_atomic_store(&lock->lck_mtx.data, data, release);
543 lock_enable_preemption();
544 }
545
546 static void
lck_mtx_ilk_unlock(lck_mtx_t * lock)547 lck_mtx_ilk_unlock(lck_mtx_t *lock)
548 {
549 lck_mtx_ilk_unlock_v(lock, lock->lck_mtx.data & ~LCK_MTX_ILOCK);
550 }
551
552
553 #pragma mark lck_mtx_t: turnstile integration
554
555 /*
556 * Routine: lck_mtx_lock_wait
557 *
558 * Invoked in order to wait on contention.
559 *
560 * Called with the interlock locked and
561 * returns it unlocked.
562 *
563 * Always aggressively sets the owning thread to promoted,
564 * even if it's the same or higher priority
565 * This prevents it from lowering its own priority while holding a lock
566 *
567 * TODO: Come up with a more efficient way to handle same-priority promotions
568 * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
569 */
570 static struct turnstile *
lck_mtx_lock_wait(lck_mtx_t * lck,thread_t self,thread_t holder,struct turnstile * ts)571 lck_mtx_lock_wait(
572 lck_mtx_t *lck,
573 thread_t self,
574 thread_t holder,
575 struct turnstile *ts)
576 {
577 uint64_t sleep_start = LCK_MTX_BLOCK_BEGIN();
578
579 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
580 unslide_for_kdebug(lck), (uintptr_t)thread_tid(self), 0, 0, 0);
581
582 if (ts == TURNSTILE_NULL) {
583 ts = turnstile_prepare_compact_id((uintptr_t)lck,
584 lck->lck_mtx_tsid, TURNSTILE_KERNEL_MUTEX);
585 if (lck->lck_mtx_tsid == 0) {
586 lck->lck_mtx_tsid = ts->ts_compact_id;
587 }
588 }
589 assert3u(ts->ts_compact_id, ==, lck->lck_mtx_tsid);
590
591 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
592 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
593
594 waitq_assert_wait64(&ts->ts_waitq, LCK_MTX_EVENT(lck),
595 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
596
597 lck_mtx_ilk_unlock(lck);
598
599 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
600
601 thread_block(THREAD_CONTINUE_NULL);
602
603 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
604
605 LCK_MTX_BLOCK_END(lck, lck->lck_mtx_grp, sleep_start);
606
607 return ts;
608 }
609
610 static void
lck_mtx_lock_wait_done(lck_mtx_t * lck,struct turnstile * ts)611 lck_mtx_lock_wait_done(lck_mtx_t *lck, struct turnstile *ts)
612 {
613 if (turnstile_complete_compact_id((uintptr_t)lck, ts,
614 TURNSTILE_KERNEL_MUTEX)) {
615 lck->lck_mtx_tsid = 0;
616 }
617 }
618
619 /*
620 * Routine: lck_mtx_lock_will_need_wakeup
621 *
622 * Returns whether the thread is the current turnstile inheritor,
623 * which means it will have to call lck_mtx_unlock_wakeup()
624 * on unlock.
625 */
626 __attribute__((always_inline))
627 static bool
lck_mtx_lock_will_need_wakeup(lck_mtx_t * lck,thread_t self)628 lck_mtx_lock_will_need_wakeup(lck_mtx_t *lck, thread_t self)
629 {
630 uint32_t tsid = lck->lck_mtx_tsid;
631
632 return tsid && turnstile_get_by_id(tsid)->ts_inheritor == self;
633 }
634
635 /*
636 * Routine: lck_mtx_unlock_wakeup
637 *
638 * Invoked on unlock when there is contention.
639 *
640 * Called with the interlock locked.
641 *
642 * NOTE: callers should call turnstile_clenup after
643 * dropping the interlock.
644 */
645 static void
lck_mtx_unlock_wakeup(lck_mtx_t * lck,__kdebug_only thread_t thread)646 lck_mtx_unlock_wakeup(
647 lck_mtx_t *lck,
648 __kdebug_only thread_t thread)
649 {
650 struct turnstile *ts;
651 kern_return_t did_wake;
652
653 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
654 unslide_for_kdebug(lck), (uintptr_t)thread_tid(thread), 0, 0, 0);
655
656 ts = turnstile_get_by_id(lck->lck_mtx_tsid);
657
658 /*
659 * We can skip turnstile_{prepare,cleanup} because
660 * we hold the interlock of the primitive,
661 * and enqueues/wakeups all happen under the interlock,
662 * which means the turnstile is stable.
663 */
664 did_wake = waitq_wakeup64_one(&ts->ts_waitq, LCK_MTX_EVENT(lck),
665 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
666 assert(did_wake == KERN_SUCCESS);
667
668 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
669
670 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
671 }
672
673
674 #pragma mark lck_mtx_t: lck_mtx_lock
675
676 static inline bool
lck_mtx_ctid_on_core(uint32_t ctid)677 lck_mtx_ctid_on_core(uint32_t ctid)
678 {
679 thread_t th = ctid_get_thread_unsafe(ctid);
680
681 return th && machine_thread_on_core_allow_invalid(th);
682 }
683
684 #define LCK_MTX_OWNER_FOR_TRACE(lock) \
685 VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe((lock)->lck_mtx.data))
686
687 static void
lck_mtx_lock_adaptive_spin(lck_mtx_t * lock,lck_mtx_state_t state)688 lck_mtx_lock_adaptive_spin(lck_mtx_t *lock, lck_mtx_state_t state)
689 {
690 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
691 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
692 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
693 hw_spin_state_t ss = { };
694 uint64_t deadline;
695
696 lck_mtx_mcs_t mcs, node;
697 lck_mcs_id_t idx, pidx, clear_idx;
698 unsigned long prev;
699 lck_mtx_state_t nstate;
700 ast_t *const astp = ast_pending();
701
702 idx = lck_mtx_get_mcs_id();
703 mcs = &lck_mcs_get_current()->mcs_mtx;
704
705 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
706 trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, 0, 0);
707
708 deadline = ml_get_timebase() + os_atomic_load(&MutexSpin, relaxed) * processor_avail_count;
709
710 /*
711 * Take a spot in the adaptive spin queue,
712 * and then spin until we're at the head of it.
713 *
714 * Until we're at the head, we do not need to monitor
715 * for whether the current owner is on core or not:
716 *
717 * 1. the head of the queue is doing it already,
718 *
719 * 2. when the entire adaptive spin queue will "give up"
720 * as a result of the owner going off core, we want
721 * to avoid a thundering herd and let the AS queue
722 * pour into the interlock one slowly.
723 *
724 * Do give up if the scheduler made noises something
725 * more important has shown up.
726 *
727 * Note: this function is optimized so that we do not touch
728 * our local mcs node when we're the head of the queue.
729 *
730 * This allows us in the case when the contention is
731 * between 2 cores only to not have to touch this
732 * cacheline at all.
733 */
734 pidx = os_atomic_xchg(&lock->lck_mtx.as_tail, idx, release);
735 if (pidx) {
736 node = lck_mtx_get_mcs(pidx);
737 mcs->lmm_as_prev = pidx;
738 os_atomic_store(&node->lmm_as_next, mcs, release);
739
740 while (!hw_spin_wait_until(&mcs->lmm_as_prev, prev,
741 prev == 0 || (os_atomic_load(astp, relaxed) & AST_URGENT) || (ml_get_timebase() > deadline))) {
742 hw_spin_should_keep_spinning(lock, pol, to, &ss);
743 }
744
745 if (__improbable(prev)) {
746 goto adaptive_spin_fail;
747 }
748
749 clear_idx = 0;
750 } else {
751 clear_idx = idx;
752 }
753
754 /*
755 * We're now first in line.
756 *
757 * It's our responsbility to monitor the lock's state
758 * for whether (1) the lock has become available,
759 * (2) its owner has gone off core, (3) the scheduler
760 * wants its CPU back, or (4) we've spun for too long.
761 */
762 deadline = ml_get_timebase() + os_atomic_load(&MutexSpin, relaxed);
763
764 for (;;) {
765 state.val = lock_load_exclusive(&lock->lck_mtx.val, acquire);
766
767 if (__probable(!state.ilocked && !state.ilk_tail && !state.owner)) {
768 /*
769 * 2-core contention: if we can, try to dequeue
770 * ourselves from the adaptive spin queue
771 * as part of this CAS in order to avoid
772 * the cost of lck_mtx_ilk_lock_cleanup_as_mcs()
773 * and zeroing the mcs node at all.
774 *
775 * Because the queue is designed to limit contention,
776 * using store-exclusive over an armv8.1 LSE atomic
777 * is actually marginally better (presumably due to
778 * the better codegen).
779 */
780 nstate = state;
781 nstate.ilocked = true;
782 if (state.as_tail == clear_idx) {
783 nstate.as_tail = 0;
784 }
785 if (__probable(lock_store_exclusive(&lock->lck_mtx.val,
786 state.val, nstate.val, acquire))) {
787 break;
788 }
789 } else {
790 lock_wait_for_event();
791 }
792
793 if (__improbable(ml_get_timebase() > deadline ||
794 (os_atomic_load(astp, relaxed) & AST_URGENT) ||
795 (!state.ilocked && !state.ilk_tail && state.owner &&
796 !lck_mtx_ctid_on_core(state.owner)))) {
797 goto adaptive_spin_fail;
798 }
799 }
800
801 /*
802 * If we're here, we got the lock, we just have to cleanup
803 * the MCS nodes and return.
804 */
805 if (state.as_tail != clear_idx) {
806 lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, &ss);
807 lck_mtx_mcs_clear(mcs);
808 }
809
810 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
811 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(thread),
812 lock->lck_mtx_tsid, 0, 0);
813 return;
814
815 adaptive_spin_fail:
816 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
817 trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, 0, 0);
818 return lck_mtx_ilk_lock_contended(lock, state, LCK_ILK_MODE_FROM_AS);
819 }
820
821 static NOINLINE void
lck_mtx_lock_contended(lck_mtx_t * lock,thread_t thread,lck_mtx_mode_t mode)822 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, lck_mtx_mode_t mode)
823 {
824 struct turnstile *ts = TURNSTILE_NULL;
825 lck_mtx_state_t state;
826 uint32_t ctid = thread->ctid;
827 uint32_t data;
828 #if CONFIG_DTRACE
829 int first_miss = 0;
830 #endif /* CONFIG_DTRACE */
831 bool direct_wait = false;
832 uint64_t spin_start;
833 uint32_t profile;
834
835 lck_mtx_check_irq(lock);
836 if (mode == LCK_MTX_MODE_SLEEPABLE) {
837 lock_disable_preemption_for_thread(thread);
838 }
839
840 for (;;) {
841 /*
842 * Load the current state and perform sanity checks
843 *
844 * Note that the various "corrupt" values are designed
845 * so that the slowpath is taken when a mutex was used
846 * after destruction, so that we do not have to do
847 * sanity checks in the fast path.
848 */
849 state = os_atomic_load(&lock->lck_mtx, relaxed);
850 if (state.owner == ctid) {
851 __lck_mtx_owned_panic(lock, thread);
852 }
853 if (lock->lck_mtx_type != LCK_TYPE_MUTEX ||
854 state.data == LCK_MTX_TAG_DESTROYED) {
855 __lck_mtx_invalid_panic(lock);
856 }
857 profile = (state.data & LCK_MTX_PROFILE);
858
859 /*
860 * Attempt steal
861 *
862 * When the lock state is 0, then no thread can be queued
863 * for adaptive spinning or for the interlock yet.
864 *
865 * As such we can attempt to try to take the interlock.
866 * (we can't take the mutex directly because we need
867 * the interlock to do turnstile operations on the way out).
868 */
869 if ((state.val & ~(uint64_t)LCK_MTX_PROFILE) == 0) {
870 if (!os_atomic_cmpxchgv(&lock->lck_mtx.val,
871 state.val, state.val | LCK_MTX_ILOCK,
872 &state.val, acquire)) {
873 continue;
874 }
875 break;
876 }
877
878 #if CONFIG_DTRACE
879 if (profile) {
880 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
881 }
882 #endif /* CONFIG_DTRACE */
883
884 if (mode == LCK_MTX_MODE_SLEEPABLE) {
885 spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
886 } else {
887 spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
888 }
889
890 /*
891 * Adaptive spin or interlock
892 *
893 * Evaluate if adaptive spinning should be attempted,
894 * and if yes go to adaptive spin.
895 *
896 * Otherwise (and this includes always-spin mutexes),
897 * go for the interlock.
898 */
899 if (mode != LCK_MTX_MODE_SPIN_ALWAYS &&
900 (state.ilocked || state.as_tail || !state.owner ||
901 lck_mtx_ctid_on_core(state.owner))) {
902 lck_mtx_lock_adaptive_spin(lock, state);
903 } else {
904 direct_wait = true;
905 lck_mtx_ilk_lock_nopreempt(lock, LCK_ILK_MODE_DIRECT);
906 }
907
908 if (mode == LCK_MTX_MODE_SLEEPABLE) {
909 LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
910 } else {
911 LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
912 }
913
914 /*
915 * Take or sleep
916 *
917 * We now have the interlock. Either the owner
918 * isn't set, and the mutex is ours to claim,
919 * or we must go to sleep.
920 *
921 * If we go to sleep, we need to set LCK_MTX_NEEDS_WAKEUP
922 * to force the current lock owner to call
923 * lck_mtx_unlock_wakeup().
924 */
925 state = os_atomic_load(&lock->lck_mtx, relaxed);
926 if (state.owner == LCK_MTX_NULL_CTID) {
927 break;
928 }
929
930 if (mode == LCK_MTX_MODE_SPIN_ALWAYS) {
931 __lck_mtx_lock_is_sleepable_panic(lock);
932 }
933
934 #if CONFIG_DTRACE
935 if (profile) {
936 LCK_MTX_PROF_WAIT(lock, lock->lck_mtx_grp,
937 direct_wait, &first_miss);
938 }
939 #endif /* CONFIG_DTRACE */
940 os_atomic_store(&lock->lck_mtx.data,
941 state.data | LCK_MTX_ILOCK | LCK_MTX_NEEDS_WAKEUP,
942 compiler_acq_rel);
943 ts = lck_mtx_lock_wait(lock, thread,
944 ctid_get_thread(state.owner), ts);
945
946 /* returns interlock unlocked and preemption re-enabled */
947 lock_disable_preemption_for_thread(thread);
948 }
949
950 /*
951 * We can take the lock!
952 *
953 * We only have the interlock and the owner field is 0.
954 *
955 * Perform various turnstile cleanups if needed,
956 * claim the lock, and reenable preemption (if needed).
957 */
958 if (ts) {
959 lck_mtx_lock_wait_done(lock, ts);
960 }
961 data = ctid | profile;
962 if (lck_mtx_lock_will_need_wakeup(lock, thread)) {
963 data |= LCK_MTX_NEEDS_WAKEUP;
964 }
965 if (mode != LCK_MTX_MODE_SLEEPABLE) {
966 data |= LCK_MTX_ILOCK | LCK_MTX_SPIN_MODE;
967 }
968 os_atomic_store(&lock->lck_mtx.data, data, release);
969
970 if (mode == LCK_MTX_MODE_SLEEPABLE) {
971 lock_enable_preemption();
972 }
973
974 assert(thread->turnstile != NULL);
975
976 if (ts) {
977 turnstile_cleanup();
978 }
979 LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
980 mode != LCK_MTX_MODE_SLEEPABLE, profile);
981 }
982
983 #if LCK_MTX_CHECK_INVARIANTS || CONFIG_DTRACE
984 __attribute__((noinline))
985 #else
986 __attribute__((always_inline))
987 #endif
988 static void
lck_mtx_lock_slow(lck_mtx_t * lock,thread_t thread,lck_mtx_state_t state,lck_mtx_mode_t mode)989 lck_mtx_lock_slow(
990 lck_mtx_t *lock,
991 thread_t thread,
992 lck_mtx_state_t state,
993 lck_mtx_mode_t mode)
994 {
995 #pragma unused(state)
996 #if CONFIG_DTRACE
997 lck_mtx_state_t ostate = {
998 .data = LCK_MTX_PROFILE,
999 };
1000 #endif /* CONFIG_DTRACE */
1001
1002 #if LCK_MTX_CHECK_INVARIANTS
1003 if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
1004 lck_mtx_check_preemption(lock, thread,
1005 (mode == LCK_MTX_MODE_SPIN));
1006 }
1007 #endif /* LCK_MTX_CHECK_INVARIANTS */
1008 #if CONFIG_DTRACE
1009 if (state.val == ostate.val) {
1010 state.data = thread->ctid | LCK_MTX_PROFILE;
1011 if (mode != LCK_MTX_MODE_SLEEPABLE) {
1012 state.ilocked = true;
1013 state.spin_mode = true;
1014 }
1015 os_atomic_cmpxchgv(&lock->lck_mtx.val,
1016 ostate.val, state.val, &state.val, acquire);
1017 }
1018 if ((state.val & ~ostate.val) == 0) {
1019 LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
1020 mode != LCK_MTX_MODE_SLEEPABLE,
1021 state.data & LCK_MTX_PROFILE);
1022 return;
1023 }
1024 #endif /* CONFIG_DTRACE */
1025 lck_mtx_lock_contended(lock, thread, mode);
1026 }
1027
1028 static __attribute__((always_inline)) void
lck_mtx_lock_fastpath(lck_mtx_t * lock,lck_mtx_mode_t mode)1029 lck_mtx_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1030 {
1031 thread_t thread = current_thread();
1032 lck_mtx_state_t state = {
1033 .data = thread->ctid,
1034 };
1035 uint64_t take_slowpath = 0;
1036
1037 if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
1038 take_slowpath |= LCK_MTX_SNIFF_PREEMPTION(thread);
1039 }
1040 take_slowpath |= LCK_MTX_SNIFF_DTRACE();
1041
1042 if (mode != LCK_MTX_MODE_SLEEPABLE) {
1043 lock_disable_preemption_for_thread(thread);
1044 state.ilocked = true;
1045 state.spin_mode = true;
1046 }
1047
1048 /*
1049 * Do the CAS on the entire mutex state,
1050 * which hence requires for the ILK/AS queues
1051 * to be empty (which is fairer).
1052 */
1053 lock_cmpxchgv(&lock->lck_mtx.val,
1054 0, state.val, &state.val, acquire);
1055
1056 take_slowpath |= state.val;
1057 if (__improbable(take_slowpath)) {
1058 return lck_mtx_lock_slow(lock, thread, state, mode);
1059 }
1060 }
1061
1062 __mockable void
lck_mtx_lock(lck_mtx_t * lock)1063 lck_mtx_lock(lck_mtx_t *lock)
1064 {
1065 lck_mtx_lock_fastpath(lock, LCK_MTX_MODE_SLEEPABLE);
1066 }
1067
1068 void
lck_mtx_lock_spin(lck_mtx_t * lock)1069 lck_mtx_lock_spin(lck_mtx_t *lock)
1070 {
1071 lck_mtx_lock_fastpath(lock, LCK_MTX_MODE_SPIN);
1072 }
1073
1074 void
lck_mtx_lock_spin_always(lck_mtx_t * lock)1075 lck_mtx_lock_spin_always(lck_mtx_t *lock)
1076 {
1077 lck_mtx_lock_fastpath(lock, LCK_MTX_MODE_SPIN_ALWAYS);
1078 }
1079
1080
1081 #pragma mark lck_mtx_t: lck_mtx_try_lock
1082
1083 static __attribute__((always_inline)) bool
lck_mtx_try_lock_slow_inline(lck_mtx_t * lock,thread_t thread,uint32_t odata,uint32_t ndata,bool spin)1084 lck_mtx_try_lock_slow_inline(
1085 lck_mtx_t *lock,
1086 thread_t thread,
1087 uint32_t odata,
1088 uint32_t ndata,
1089 bool spin)
1090 {
1091 #pragma unused(lock, thread, odata, ndata)
1092 #if CONFIG_DTRACE
1093 if (odata == LCK_MTX_PROFILE) {
1094 os_atomic_cmpxchgv(&lock->lck_mtx.data,
1095 odata, ndata | LCK_MTX_PROFILE, &odata, acquire);
1096 }
1097 if ((odata & ~LCK_MTX_PROFILE) == 0) {
1098 LCK_MTX_TRY_ACQUIRED(lock, lock->lck_mtx_grp,
1099 spin, odata & LCK_MTX_PROFILE);
1100 return true;
1101 }
1102 if (odata & LCK_MTX_PROFILE) {
1103 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &(int){ 0 });
1104 }
1105 #endif /* CONFIG_DTRACE */
1106
1107 if (spin) {
1108 lock_enable_preemption();
1109 }
1110 return false;
1111 }
1112
1113 #if CONFIG_DTRACE || LCK_MTX_CHECK_INVARIANTS
1114 __attribute__((noinline))
1115 #else
1116 __attribute__((always_inline))
1117 #endif
1118 static bool
lck_mtx_try_lock_slow(lck_mtx_t * lock,thread_t thread,uint32_t odata,uint32_t ndata)1119 lck_mtx_try_lock_slow(
1120 lck_mtx_t *lock,
1121 thread_t thread,
1122 uint32_t odata,
1123 uint32_t ndata)
1124 {
1125 return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, false);
1126 }
1127
1128 #if CONFIG_DTRACE || LCK_MTX_CHECK_INVARIANTS
1129 __attribute__((noinline))
1130 #else
1131 __attribute__((always_inline))
1132 #endif
1133 static bool
lck_mtx_try_lock_slow_spin(lck_mtx_t * lock,thread_t thread,uint32_t odata,uint32_t ndata)1134 lck_mtx_try_lock_slow_spin(
1135 lck_mtx_t *lock,
1136 thread_t thread,
1137 uint32_t odata,
1138 uint32_t ndata)
1139 {
1140 return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, true);
1141 }
1142
1143 static __attribute__((always_inline)) bool
lck_mtx_try_lock_fastpath(lck_mtx_t * lock,lck_mtx_mode_t mode)1144 lck_mtx_try_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1145 {
1146 thread_t thread = current_thread();
1147 uint32_t odata, ndata = thread->ctid;
1148 uint32_t take_slowpath = 0;
1149
1150 #if CONFIG_DTRACE
1151 take_slowpath |= lck_debug_state.lds_value;
1152 #endif
1153 if (mode != LCK_MTX_MODE_SLEEPABLE) {
1154 lock_disable_preemption_for_thread(thread);
1155 ndata |= LCK_MTX_SPIN_MODE | LCK_MTX_ILOCK;
1156 }
1157
1158 /*
1159 * try_lock because it's likely to be used for cases
1160 * like lock inversion resolutions tries a bit harder
1161 * than lck_mtx_lock() to take the lock and ignores
1162 * adaptive spin / interlock queues by doing the CAS
1163 * on the 32bit mutex data only.
1164 */
1165 lock_cmpxchgv(&lock->lck_mtx.data, 0, ndata, &odata, acquire);
1166
1167 take_slowpath |= odata;
1168 if (__probable(!take_slowpath)) {
1169 return true;
1170 }
1171
1172 if (mode == LCK_MTX_MODE_SPIN_ALWAYS &&
1173 (odata & LCK_MTX_CTID_MASK) &&
1174 !(odata & LCK_MTX_SPIN_MODE)) {
1175 __lck_mtx_lock_is_sleepable_panic(lock);
1176 }
1177
1178 if (mode == LCK_MTX_MODE_SLEEPABLE) {
1179 return lck_mtx_try_lock_slow(lock, thread, odata, ndata);
1180 } else {
1181 return lck_mtx_try_lock_slow_spin(lock, thread, odata, ndata);
1182 }
1183 }
1184
1185 boolean_t
lck_mtx_try_lock(lck_mtx_t * lock)1186 lck_mtx_try_lock(lck_mtx_t *lock)
1187 {
1188 return lck_mtx_try_lock_fastpath(lock, LCK_MTX_MODE_SLEEPABLE);
1189 }
1190
1191 boolean_t
lck_mtx_try_lock_spin(lck_mtx_t * lock)1192 lck_mtx_try_lock_spin(lck_mtx_t *lock)
1193 {
1194 return lck_mtx_try_lock_fastpath(lock, LCK_MTX_MODE_SPIN);
1195 }
1196
1197 boolean_t
lck_mtx_try_lock_spin_always(lck_mtx_t * lock)1198 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
1199 {
1200 return lck_mtx_try_lock_fastpath(lock, LCK_MTX_MODE_SPIN_ALWAYS);
1201 }
1202
1203
1204 #pragma mark lck_mtx_t: lck_mtx_unlock
1205
1206 static NOINLINE void
lck_mtx_unlock_contended(lck_mtx_t * lock,thread_t thread,uint32_t data)1207 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, uint32_t data)
1208 {
1209 bool cleanup = false;
1210
1211 #if !CONFIG_DTRACE
1212 /*
1213 * This check is done by lck_mtx_unlock_slow() when it is enabled.
1214 */
1215 if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1216 __lck_mtx_not_owned_panic(lock, thread);
1217 }
1218 #endif /* !CONFIG_DTRACE */
1219
1220 if ((data & LCK_MTX_SPIN_MODE) == 0) {
1221 lock_disable_preemption_for_thread(thread);
1222 lck_mtx_ilk_lock_nopreempt(lock, LCK_ILK_MODE_UNLOCK);
1223 }
1224
1225 /*
1226 * We must re-load the data: we might have taken
1227 * the slowpath because another thread had taken
1228 * the interlock and set the NEEDS_WAKEUP bit
1229 * while we were spinning to get it.
1230 */
1231 data = os_atomic_load(&lock->lck_mtx.data, compiler_acq_rel);
1232 if (data & LCK_MTX_NEEDS_WAKEUP) {
1233 lck_mtx_unlock_wakeup(lock, thread);
1234 cleanup = true;
1235 }
1236 lck_mtx_ilk_unlock_v(lock, data & LCK_MTX_PROFILE);
1237
1238 LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, data & LCK_MTX_PROFILE);
1239
1240 /*
1241 * Do not do any turnstile operations outside of this block.
1242 *
1243 * lock/unlock is called at early stage of boot while single
1244 * threaded, without turnstiles being available yet.
1245 * Even without contention we can come throught the slow path
1246 * if the mutex is acquired as a spin lock.
1247 */
1248 if (cleanup) {
1249 turnstile_cleanup();
1250 }
1251 }
1252
1253 #if CONFIG_DTRACE
1254 __attribute__((noinline))
1255 #else
1256 __attribute__((always_inline))
1257 #endif
1258 static void
lck_mtx_unlock_slow(lck_mtx_t * lock,thread_t thread,uint32_t data)1259 lck_mtx_unlock_slow(lck_mtx_t *lock, thread_t thread, uint32_t data)
1260 {
1261 #if CONFIG_DTRACE
1262 /*
1263 * If Dtrace is enabled, locks can be profiled,
1264 * which causes the fastpath of unlock to fail.
1265 */
1266 if ((data & LCK_MTX_BITS_MASK) == LCK_MTX_PROFILE) {
1267 os_atomic_cmpxchgv(&lock->lck_mtx.data, data, LCK_MTX_PROFILE,
1268 &data, release);
1269 }
1270 if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1271 __lck_mtx_not_owned_panic(lock, thread);
1272 }
1273 if ((data & (LCK_MTX_BITS_MASK & ~LCK_MTX_PROFILE)) == 0) {
1274 LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, false);
1275 return;
1276 }
1277 #endif /* CONFIG_DTRACE */
1278
1279 lck_mtx_unlock_contended(lock, thread, data);
1280 }
1281
1282 __mockable void
lck_mtx_unlock(lck_mtx_t * lock)1283 lck_mtx_unlock(lck_mtx_t *lock)
1284 {
1285 thread_t thread = current_thread();
1286 uint32_t take_slowpath = 0;
1287 uint32_t data;
1288
1289 take_slowpath |= LCK_MTX_SNIFF_DTRACE();
1290
1291 /*
1292 * The fast path ignores the ILK/AS queues on purpose,
1293 * those really are a "lock" concept, not unlock.
1294 */
1295 if (__probable(lock_cmpxchgv(&lock->lck_mtx.data,
1296 thread->ctid, 0, &data, release))) {
1297 if (__probable(!take_slowpath)) {
1298 return;
1299 }
1300 }
1301
1302 lck_mtx_unlock_slow(lock, thread, data);
1303 }
1304
1305
1306 #pragma mark lck_mtx_t: misc
1307
1308 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1309 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
1310 {
1311 lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1312 thread_t thread = current_thread();
1313
1314 if (type == LCK_MTX_ASSERT_OWNED) {
1315 if (state.owner != thread->ctid) {
1316 __lck_mtx_not_owned_panic(lock, thread);
1317 }
1318 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
1319 if (state.owner == thread->ctid) {
1320 __lck_mtx_owned_panic(lock, thread);
1321 }
1322 } else {
1323 panic("lck_mtx_assert(): invalid arg (%u)", type);
1324 }
1325 }
1326
1327 #if !LCK_MTX_USE_ARCH
1328 void
lck_mtx_assert_owned_spin(lck_mtx_t * lock)1329 lck_mtx_assert_owned_spin(lck_mtx_t *lock)
1330 {
1331 lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1332 thread_t thread = current_thread();
1333
1334 if (state.owner != thread->ctid) {
1335 __lck_mtx_not_owned_panic(lock, thread);
1336 }
1337
1338 if (!state.spin_mode) {
1339 __lck_mtx_not_locked_spin(lock, thread);
1340 }
1341 }
1342 #endif /* !LCK_MTX_USE_ARCH */
1343
1344 /*
1345 * Routine: lck_mtx_convert_spin
1346 *
1347 * Convert a mutex held for spin into a held full mutex
1348 */
1349 void
lck_mtx_convert_spin(lck_mtx_t * lock)1350 lck_mtx_convert_spin(lck_mtx_t *lock)
1351 {
1352 lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1353 thread_t thread = current_thread();
1354 uint32_t data = thread->ctid;
1355
1356 if (state.owner != data) {
1357 __lck_mtx_not_owned_panic(lock, thread);
1358 }
1359
1360 if (state.spin_mode) {
1361 /*
1362 * Note: we can acquire the lock in spin mode
1363 * _and_ be the inheritor if we waited.
1364 *
1365 * We must only clear ilocked and spin_mode,
1366 * but preserve owner and needs_wakeup.
1367 */
1368 state.ilocked = false;
1369 state.spin_mode = false;
1370 lck_mtx_ilk_unlock_v(lock, state.data);
1371 turnstile_cleanup();
1372 }
1373 }
1374
1375 /*
1376 * Routine: kdp_lck_mtx_lock_spin_is_acquired
1377 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1378 */
1379 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)1380 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
1381 {
1382 lck_mtx_state_t state = os_atomic_load(&lck->lck_mtx, relaxed);
1383
1384 if (not_in_kdp) {
1385 panic("panic: spinlock acquired check done outside of kernel debugger");
1386 }
1387 if (state.data == LCK_MTX_TAG_DESTROYED) {
1388 return false;
1389 }
1390 return state.owner || state.ilocked;
1391 }
1392
1393 void
kdp_lck_mtx_find_owner(struct waitq * waitq __unused,event64_t event,thread_waitinfo_t * waitinfo)1394 kdp_lck_mtx_find_owner(
1395 struct waitq *waitq __unused,
1396 event64_t event,
1397 thread_waitinfo_t *waitinfo)
1398 {
1399 lck_mtx_t *mutex = LCK_EVENT_TO_MUTEX(event);
1400 lck_mtx_state_t state = os_atomic_load(&mutex->lck_mtx, relaxed);
1401
1402 assert3u(state.data, !=, LCK_MTX_TAG_DESTROYED);
1403 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1404 waitinfo->owner = thread_tid(ctid_get_thread(state.owner));
1405 }
1406
1407 #endif /* !LCK_MTX_USE_ARCH */
1408
1409 /*
1410 * Routine: mutex_pause
1411 *
1412 * Called by former callers of simple_lock_pause().
1413 */
1414 #define MAX_COLLISION_COUNTS 32
1415 #define MAX_COLLISION 8
1416
1417 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1418
1419 uint32_t collision_backoffs[MAX_COLLISION] = {
1420 10, 50, 100, 200, 400, 600, 800, 1000
1421 };
1422
1423
1424 void
mutex_pause(uint32_t collisions)1425 mutex_pause(uint32_t collisions)
1426 {
1427 wait_result_t wait_result;
1428 uint32_t back_off;
1429
1430 if (collisions >= MAX_COLLISION_COUNTS) {
1431 collisions = MAX_COLLISION_COUNTS - 1;
1432 }
1433 max_collision_count[collisions]++;
1434
1435 if (collisions >= MAX_COLLISION) {
1436 collisions = MAX_COLLISION - 1;
1437 }
1438 back_off = collision_backoffs[collisions];
1439
1440 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1441 assert(wait_result == THREAD_WAITING);
1442
1443 wait_result = thread_block(THREAD_CONTINUE_NULL);
1444 assert(wait_result == THREAD_TIMED_OUT);
1445 }
1446
1447
1448 unsigned int mutex_yield_wait = 0;
1449 unsigned int mutex_yield_no_wait = 0;
1450
1451 boolean_t
lck_mtx_yield(lck_mtx_t * lck)1452 lck_mtx_yield(
1453 lck_mtx_t *lck)
1454 {
1455 bool has_waiters = LCK_MTX_HAS_WAITERS(lck);
1456
1457 #if DEBUG
1458 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1459 #endif /* DEBUG */
1460
1461 if (!has_waiters) {
1462 mutex_yield_no_wait++;
1463 } else {
1464 mutex_yield_wait++;
1465 lck_mtx_unlock(lck);
1466 mutex_pause(0);
1467 lck_mtx_lock(lck);
1468 }
1469 return has_waiters;
1470 }
1471