1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #define LOCK_PRIVATE 1
65
66 #include <mach_ldebug.h>
67
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
83 #include <i386/mp.h>
84 #include <machine/atomic.h>
85 #include <sys/kdebug.h>
86 #include <i386/locks_i386_inlines.h>
87 #include <kern/cpu_number.h>
88 #include <os/hash.h>
89
90 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
91
92 uint64_t _Atomic lock_panic_timeout = 0xf000000; /* 251e6 TSC ticks */
93
94 /* Forwards */
95
96 #if USLOCK_DEBUG
97 /*
98 * Perform simple lock checks.
99 */
100 int uslock_check = 1;
101 int max_lock_loops = 100000000;
102 decl_simple_lock_data(extern, printf_lock);
103 decl_simple_lock_data(extern, panic_lock);
104 #endif /* USLOCK_DEBUG */
105
106 extern unsigned int not_in_kdp;
107
108 #if !LOCK_STATS
109 #define usimple_lock_nopreempt(lck, grp) \
110 usimple_lock_nopreempt(lck)
111 #define usimple_lock_try_nopreempt(lck, grp) \
112 usimple_lock_try_nopreempt(lck)
113 #endif
114 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
115 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
116
117 /*
118 * We often want to know the addresses of the callers
119 * of the various lock routines. However, this information
120 * is only used for debugging and statistics.
121 */
122 typedef void *pc_t;
123 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
124 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
125 #if ANY_LOCK_DEBUG
126 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
127 #define DECL_PC(pc) pc_t pc;
128 #else /* ANY_LOCK_DEBUG */
129 #define DECL_PC(pc)
130 #ifdef lint
131 /*
132 * Eliminate lint complaints about unused local pc variables.
133 */
134 #define OBTAIN_PC(pc) ++pc
135 #else /* lint */
136 #define OBTAIN_PC(pc)
137 #endif /* lint */
138 #endif /* USLOCK_DEBUG */
139
140 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
141
142 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
143
144 KALLOC_TYPE_DEFINE(KT_LCK_MTX_EXT, lck_mtx_ext_t, KT_PRIV_ACCT);
145
146 /*
147 * atomic exchange API is a low level abstraction of the operations
148 * to atomically read, modify, and write a pointer. This abstraction works
149 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
150 * well as the ARM exclusive instructions.
151 *
152 * atomic_exchange_begin() - begin exchange and retrieve current value
153 * atomic_exchange_complete() - conclude an exchange
154 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
155 */
156 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)157 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
158 {
159 uint32_t val;
160
161 (void)ord; // Memory order not used
162 val = os_atomic_load(target, relaxed);
163 *previous = val;
164 return val;
165 }
166
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
171 }
172
173 void
atomic_exchange_abort(void)174 atomic_exchange_abort(void)
175 {
176 }
177
178 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)179 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
180 {
181 uint32_t value, prev;
182
183 for (;;) {
184 value = atomic_exchange_begin32(target, &prev, ord);
185 if (value & test_mask) {
186 if (wait) {
187 cpu_pause();
188 } else {
189 atomic_exchange_abort();
190 }
191 return FALSE;
192 }
193 value |= set_mask;
194 if (atomic_exchange_complete32(target, prev, value, ord)) {
195 return TRUE;
196 }
197 }
198 }
199
200 /*
201 * Portable lock package implementation of usimple_locks.
202 */
203
204 #if USLOCK_DEBUG
205 #define USLDBG(stmt) stmt
206 void usld_lock_init(usimple_lock_t, unsigned short);
207 void usld_lock_pre(usimple_lock_t, pc_t);
208 void usld_lock_post(usimple_lock_t, pc_t);
209 void usld_unlock(usimple_lock_t, pc_t);
210 void usld_lock_try_pre(usimple_lock_t, pc_t);
211 void usld_lock_try_post(usimple_lock_t, pc_t);
212 int usld_lock_common_checks(usimple_lock_t, char *);
213 #else /* USLOCK_DEBUG */
214 #define USLDBG(stmt)
215 #endif /* USLOCK_DEBUG */
216
217 /*
218 * Forward definitions
219 */
220
221 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
222 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
223 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
224 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
225 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
226 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
227
228
229 /*
230 * Routine: lck_spin_alloc_init
231 */
232 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)233 lck_spin_alloc_init(
234 lck_grp_t *grp,
235 lck_attr_t *attr)
236 {
237 lck_spin_t *lck;
238
239 lck = zalloc(KT_LCK_SPIN);
240 lck_spin_init(lck, grp, attr);
241 return lck;
242 }
243
244 /*
245 * Routine: lck_spin_free
246 */
247 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)248 lck_spin_free(
249 lck_spin_t *lck,
250 lck_grp_t *grp)
251 {
252 lck_spin_destroy(lck, grp);
253 zfree(KT_LCK_SPIN, lck);
254 }
255
256 /*
257 * Routine: lck_spin_init
258 */
259 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)260 lck_spin_init(
261 lck_spin_t *lck,
262 lck_grp_t *grp,
263 __unused lck_attr_t *attr)
264 {
265 usimple_lock_init((usimple_lock_t) lck, 0);
266 if (grp) {
267 lck_grp_reference(grp, &grp->lck_grp_spincnt);
268 }
269 }
270
271 /*
272 * Routine: lck_spin_destroy
273 */
274 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)275 lck_spin_destroy(
276 lck_spin_t *lck,
277 lck_grp_t *grp)
278 {
279 if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
280 return;
281 }
282 lck->interlock = LCK_SPIN_TAG_DESTROYED;
283 if (grp) {
284 lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
285 }
286 return;
287 }
288
289 /*
290 * Routine: lck_spin_lock
291 */
292 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)293 lck_spin_lock_grp(
294 lck_spin_t *lck,
295 lck_grp_t *grp)
296 {
297 #pragma unused(grp)
298 usimple_lock((usimple_lock_t) lck, grp);
299 }
300
301 void
lck_spin_lock(lck_spin_t * lck)302 lck_spin_lock(
303 lck_spin_t *lck)
304 {
305 usimple_lock((usimple_lock_t) lck, NULL);
306 }
307
308 void
lck_spin_lock_nopreempt(lck_spin_t * lck)309 lck_spin_lock_nopreempt(
310 lck_spin_t *lck)
311 {
312 usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
313 }
314
315 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)316 lck_spin_lock_nopreempt_grp(
317 lck_spin_t *lck,
318 lck_grp_t *grp)
319 {
320 #pragma unused(grp)
321 usimple_lock_nopreempt((usimple_lock_t) lck, grp);
322 }
323
324 /*
325 * Routine: lck_spin_unlock
326 */
327 void
lck_spin_unlock(lck_spin_t * lck)328 lck_spin_unlock(
329 lck_spin_t *lck)
330 {
331 usimple_unlock((usimple_lock_t) lck);
332 }
333
334 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)335 lck_spin_unlock_nopreempt(
336 lck_spin_t *lck)
337 {
338 usimple_unlock_nopreempt((usimple_lock_t) lck);
339 }
340
341 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)342 lck_spin_try_lock_grp(
343 lck_spin_t *lck,
344 lck_grp_t *grp)
345 {
346 #pragma unused(grp)
347 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
348 #if DEVELOPMENT || DEBUG
349 if (lrval) {
350 pltrace(FALSE);
351 }
352 #endif
353 return lrval;
354 }
355
356
357 /*
358 * Routine: lck_spin_try_lock
359 */
360 boolean_t
lck_spin_try_lock(lck_spin_t * lck)361 lck_spin_try_lock(
362 lck_spin_t *lck)
363 {
364 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
365 #if DEVELOPMENT || DEBUG
366 if (lrval) {
367 pltrace(FALSE);
368 }
369 #endif
370 return lrval;
371 }
372
373 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)374 lck_spin_try_lock_nopreempt(
375 lck_spin_t *lck)
376 {
377 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
378 #if DEVELOPMENT || DEBUG
379 if (lrval) {
380 pltrace(FALSE);
381 }
382 #endif
383 return lrval;
384 }
385
386 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)387 lck_spin_try_lock_nopreempt_grp(
388 lck_spin_t *lck,
389 lck_grp_t *grp)
390 {
391 #pragma unused(grp)
392 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
393 #if DEVELOPMENT || DEBUG
394 if (lrval) {
395 pltrace(FALSE);
396 }
397 #endif
398 return lrval;
399 }
400
401 /*
402 * Routine: lck_spin_assert
403 */
404 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)405 lck_spin_assert(lck_spin_t *lock, unsigned int type)
406 {
407 thread_t thread, holder;
408 uintptr_t state;
409
410 if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
411 panic("lck_spin_assert(): invalid arg (%u)", type);
412 }
413
414 state = lock->interlock;
415 holder = (thread_t)state;
416 thread = current_thread();
417 if (type == LCK_ASSERT_OWNED) {
418 if (__improbable(holder == THREAD_NULL)) {
419 panic("Lock not owned %p = %lx", lock, state);
420 }
421 if (__improbable(holder != thread)) {
422 panic("Lock not owned by current thread %p = %lx", lock, state);
423 }
424 } else if (type == LCK_ASSERT_NOTOWNED) {
425 if (__improbable(holder != THREAD_NULL)) {
426 if (holder == thread) {
427 panic("Lock owned by current thread %p = %lx", lock, state);
428 }
429 }
430 }
431 }
432
433 /*
434 * Routine: kdp_lck_spin_is_acquired
435 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
436 * Returns: TRUE if lock is acquired.
437 */
438 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)439 kdp_lck_spin_is_acquired(lck_spin_t *lck)
440 {
441 if (not_in_kdp) {
442 panic("panic: spinlock acquired check done outside of kernel debugger");
443 }
444 return (lck->interlock != 0)? TRUE : FALSE;
445 }
446
447 /*
448 * Initialize a usimple_lock.
449 *
450 * No change in preemption state.
451 */
452 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)453 usimple_lock_init(
454 usimple_lock_t l,
455 __unused unsigned short tag)
456 {
457 USLDBG(usld_lock_init(l, tag));
458 hw_lock_init(&l->interlock);
459 }
460
461 static hw_lock_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)462 usimple_lock_acquire_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
463 {
464 #pragma unused(interrupt_time)
465
466 usimple_lock_t l = _lock;
467 uintptr_t lowner;
468 lck_spinlock_to_info_t lsti;
469
470 if (machine_timeout_suspended()) {
471 return HW_LOCK_TIMEOUT_CONTINUE;
472 }
473
474 lowner = (uintptr_t)l->interlock.lock_data;
475 lsti = lck_spinlock_timeout_hit(l, lowner);
476
477 panic("Spinlock acquisition timed out: lock=%p, "
478 "lock owner thread=0x%lx, current_thread: %p, "
479 "lock owner active on CPU %d, current owner: 0x%lx, "
480 #if INTERRUPT_MASKED_DEBUG
481 "interrupt time: %llu, "
482 #endif /* INTERRUPT_MASKED_DEBUG */
483 "spin time: %llu, start time: %llu, now: %llu, timeout: %llu",
484 l, lowner, current_thread(), lsti->owner_cpu,
485 (uintptr_t)l->interlock.lock_data,
486 #if INTERRUPT_MASKED_DEBUG
487 interrupt_time,
488 #endif /* INTERRUPT_MASKED_DEBUG */
489 now - start, start, now, timeout);
490 }
491
492 /*
493 * Acquire a usimple_lock.
494 *
495 * Returns with preemption disabled. Note
496 * that the hw_lock routines are responsible for
497 * maintaining preemption state.
498 */
499 void
500 (usimple_lock)(
501 usimple_lock_t l
502 LCK_GRP_ARG(lck_grp_t *grp))
503 {
504 DECL_PC(pc);
505
506 OBTAIN_PC(pc);
507 USLDBG(usld_lock_pre(l, pc));
508
509 (void)hw_lock_to(&l->interlock, LockTimeOutTSC,
510 usimple_lock_acquire_timeout_panic, grp);
511 #if DEVELOPMENT || DEBUG
512 pltrace(FALSE);
513 #endif
514
515 USLDBG(usld_lock_post(l, pc));
516 #if CONFIG_DTRACE
517 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
518 #endif
519 }
520
521 /*
522 * Acquire a usimple_lock_nopreempt
523 *
524 * Called and returns with preemption disabled. Note
525 * that the hw_lock routines are responsible for
526 * maintaining preemption state.
527 */
528 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)529 usimple_lock_nopreempt(
530 usimple_lock_t l,
531 lck_grp_t *grp)
532 {
533 DECL_PC(pc);
534
535 OBTAIN_PC(pc);
536 USLDBG(usld_lock_pre(l, pc));
537
538 (void)hw_lock_to_nopreempt(&l->interlock, LockTimeOutTSC,
539 usimple_lock_acquire_timeout_panic, grp);
540
541 #if DEVELOPMENT || DEBUG
542 pltrace(FALSE);
543 #endif
544
545 USLDBG(usld_lock_post(l, pc));
546 #if CONFIG_DTRACE
547 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
548 #endif
549 }
550
551
552 /*
553 * Release a usimple_lock.
554 *
555 * Returns with preemption enabled. Note
556 * that the hw_lock routines are responsible for
557 * maintaining preemption state.
558 */
559 void
usimple_unlock(usimple_lock_t l)560 usimple_unlock(
561 usimple_lock_t l)
562 {
563 DECL_PC(pc);
564
565 OBTAIN_PC(pc);
566 USLDBG(usld_unlock(l, pc));
567 #if DEVELOPMENT || DEBUG
568 pltrace(TRUE);
569 #endif
570 hw_lock_unlock(&l->interlock);
571 }
572
573 /*
574 * Release a usimple_unlock_nopreempt.
575 *
576 * Called and returns with preemption enabled. Note
577 * that the hw_lock routines are responsible for
578 * maintaining preemption state.
579 */
580 void
usimple_unlock_nopreempt(usimple_lock_t l)581 usimple_unlock_nopreempt(
582 usimple_lock_t l)
583 {
584 DECL_PC(pc);
585
586 OBTAIN_PC(pc);
587 USLDBG(usld_unlock(l, pc));
588 #if DEVELOPMENT || DEBUG
589 pltrace(TRUE);
590 #endif
591 hw_lock_unlock_nopreempt(&l->interlock);
592 }
593
594 /*
595 * Conditionally acquire a usimple_lock.
596 *
597 * On success, returns with preemption disabled.
598 * On failure, returns with preemption in the same state
599 * as when first invoked. Note that the hw_lock routines
600 * are responsible for maintaining preemption state.
601 *
602 * XXX No stats are gathered on a miss; I preserved this
603 * behavior from the original assembly-language code, but
604 * doesn't it make sense to log misses? XXX
605 */
606 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)607 usimple_lock_try(
608 usimple_lock_t l,
609 lck_grp_t *grp)
610 {
611 unsigned int success;
612 DECL_PC(pc);
613
614 OBTAIN_PC(pc);
615 USLDBG(usld_lock_try_pre(l, pc));
616 if ((success = hw_lock_try(&l->interlock, grp))) {
617 #if DEVELOPMENT || DEBUG
618 pltrace(FALSE);
619 #endif
620 USLDBG(usld_lock_try_post(l, pc));
621 }
622 return success;
623 }
624
625 /*
626 * Conditionally acquire a usimple_lock.
627 *
628 * Called and returns with preemption disabled. Note
629 * that the hw_lock routines are responsible for
630 * maintaining preemption state.
631 *
632 * XXX No stats are gathered on a miss; I preserved this
633 * behavior from the original assembly-language code, but
634 * doesn't it make sense to log misses? XXX
635 */
636 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)637 usimple_lock_try_nopreempt(
638 usimple_lock_t l,
639 lck_grp_t *grp)
640 {
641 unsigned int success;
642 DECL_PC(pc);
643
644 OBTAIN_PC(pc);
645 USLDBG(usld_lock_try_pre(l, pc));
646 if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
647 #if DEVELOPMENT || DEBUG
648 pltrace(FALSE);
649 #endif
650 USLDBG(usld_lock_try_post(l, pc));
651 }
652 return success;
653 }
654
655 /*
656 * Acquire a usimple_lock while polling for pending cpu signals
657 * and spinning on a lock.
658 *
659 */
660 unsigned
661 int
662 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
663 uint64_t deadline
664 LCK_GRP_ARG(lck_grp_t *grp))
665 {
666 boolean_t istate = ml_get_interrupts_enabled();
667
668 if (deadline < mach_absolute_time()) {
669 return 0;
670 }
671
672 while (!simple_lock_try(l, grp)) {
673 if (!istate) {
674 cpu_signal_handler(NULL);
675 }
676
677 if (deadline < mach_absolute_time()) {
678 return 0;
679 }
680
681 cpu_pause();
682 }
683
684 return 1;
685 }
686
687 void
688 (usimple_lock_try_lock_loop)(usimple_lock_t l
689 LCK_GRP_ARG(lck_grp_t *grp))
690 {
691 /* When the lock is not contended, grab the lock and go. */
692 if (!simple_lock_try(l, grp)) {
693 usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
694 }
695 }
696
697 unsigned
698 int
699 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
700 uint64_t duration
701 LCK_GRP_ARG(lck_grp_t *grp))
702 {
703 uint64_t deadline;
704 uint64_t base_at;
705 uint64_t duration_at;
706
707 /* Fast track for uncontended locks */
708 if (simple_lock_try(l, grp)) {
709 return 1;
710 }
711
712 base_at = mach_absolute_time();
713
714 nanoseconds_to_absolutetime(duration, &duration_at);
715 deadline = base_at + duration_at;
716 if (deadline < base_at) {
717 /* deadline has overflowed, make it saturate */
718 deadline = ULLONG_MAX;
719 }
720
721 return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
722 }
723
724 #if USLOCK_DEBUG
725 /*
726 * States of a usimple_lock. The default when initializing
727 * a usimple_lock is setting it up for debug checking.
728 */
729 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
730 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
731 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
732 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
733 #define USLOCK_CHECKING(l) (uslock_check && \
734 ((l)->debug.state & USLOCK_CHECKED))
735
736 /*
737 * Initialize the debugging information contained
738 * in a usimple_lock.
739 */
740 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)741 usld_lock_init(
742 usimple_lock_t l,
743 __unused unsigned short tag)
744 {
745 if (l == USIMPLE_LOCK_NULL) {
746 panic("lock initialization: null lock pointer");
747 }
748 l->lock_type = USLOCK_TAG;
749 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
750 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
751 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
752 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
753 l->debug.duration[0] = l->debug.duration[1] = 0;
754 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
755 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
756 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
757 }
758
759
760 /*
761 * These checks apply to all usimple_locks, not just
762 * those with USLOCK_CHECKED turned on.
763 */
764 int
usld_lock_common_checks(usimple_lock_t l,char * caller)765 usld_lock_common_checks(
766 usimple_lock_t l,
767 char *caller)
768 {
769 if (l == USIMPLE_LOCK_NULL) {
770 panic("%s: null lock pointer", caller);
771 }
772 if (l->lock_type != USLOCK_TAG) {
773 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
774 }
775 if (!(l->debug.state & USLOCK_INIT)) {
776 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
777 }
778 return USLOCK_CHECKING(l);
779 }
780
781
782 /*
783 * Debug checks on a usimple_lock just before attempting
784 * to acquire it.
785 */
786 /* ARGSUSED */
787 void
usld_lock_pre(usimple_lock_t l,pc_t pc)788 usld_lock_pre(
789 usimple_lock_t l,
790 pc_t pc)
791 {
792 char caller[] = "usimple_lock";
793
794
795 if (!usld_lock_common_checks(l, caller)) {
796 return;
797 }
798
799 /*
800 * Note that we have a weird case where we are getting a lock when we are]
801 * in the process of putting the system to sleep. We are running with no
802 * current threads, therefore we can't tell if we are trying to retake a lock
803 * we have or someone on the other processor has it. Therefore we just
804 * ignore this test if the locking thread is 0.
805 */
806
807 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
808 l->debug.lock_thread == (void *) current_thread()) {
809 printf("%s: lock %p already locked (at %p) by",
810 caller, l, l->debug.lock_pc);
811 printf(" current thread %p (new attempt at pc %p)\n",
812 l->debug.lock_thread, pc);
813 panic("%s", caller);
814 }
815 mp_disable_preemption();
816 mp_enable_preemption();
817 }
818
819
820 /*
821 * Debug checks on a usimple_lock just after acquiring it.
822 *
823 * Pre-emption has been disabled at this point,
824 * so we are safe in using cpu_number.
825 */
826 void
usld_lock_post(usimple_lock_t l,pc_t pc)827 usld_lock_post(
828 usimple_lock_t l,
829 pc_t pc)
830 {
831 unsigned int mycpu;
832 char caller[] = "successful usimple_lock";
833
834
835 if (!usld_lock_common_checks(l, caller)) {
836 return;
837 }
838
839 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
840 panic("%s: lock %p became uninitialized",
841 caller, l);
842 }
843 if ((l->debug.state & USLOCK_TAKEN)) {
844 panic("%s: lock 0x%p became TAKEN by someone else",
845 caller, l);
846 }
847
848 mycpu = (unsigned int)cpu_number();
849 assert(mycpu <= UCHAR_MAX);
850
851 l->debug.lock_thread = (void *)current_thread();
852 l->debug.state |= USLOCK_TAKEN;
853 l->debug.lock_pc = pc;
854 l->debug.lock_cpu = (unsigned char)mycpu;
855 }
856
857
858 /*
859 * Debug checks on a usimple_lock just before
860 * releasing it. Note that the caller has not
861 * yet released the hardware lock.
862 *
863 * Preemption is still disabled, so there's
864 * no problem using cpu_number.
865 */
866 void
usld_unlock(usimple_lock_t l,pc_t pc)867 usld_unlock(
868 usimple_lock_t l,
869 pc_t pc)
870 {
871 unsigned int mycpu;
872 char caller[] = "usimple_unlock";
873
874
875 if (!usld_lock_common_checks(l, caller)) {
876 return;
877 }
878
879 mycpu = cpu_number();
880 assert(mycpu <= UCHAR_MAX);
881
882 if (!(l->debug.state & USLOCK_TAKEN)) {
883 panic("%s: lock 0x%p hasn't been taken",
884 caller, l);
885 }
886 if (l->debug.lock_thread != (void *) current_thread()) {
887 panic("%s: unlocking lock 0x%p, owned by thread %p",
888 caller, l, l->debug.lock_thread);
889 }
890 if (l->debug.lock_cpu != mycpu) {
891 printf("%s: unlocking lock 0x%p on cpu 0x%x",
892 caller, l, mycpu);
893 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
894 panic("%s", caller);
895 }
896
897 l->debug.unlock_thread = l->debug.lock_thread;
898 l->debug.lock_thread = INVALID_PC;
899 l->debug.state &= ~USLOCK_TAKEN;
900 l->debug.unlock_pc = pc;
901 l->debug.unlock_cpu = (unsigned char)mycpu;
902 }
903
904
905 /*
906 * Debug checks on a usimple_lock just before
907 * attempting to acquire it.
908 *
909 * Preemption isn't guaranteed to be disabled.
910 */
911 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)912 usld_lock_try_pre(
913 usimple_lock_t l,
914 __unused pc_t pc)
915 {
916 char caller[] = "usimple_lock_try";
917
918 if (!usld_lock_common_checks(l, caller)) {
919 return;
920 }
921 }
922
923
924 /*
925 * Debug checks on a usimple_lock just after
926 * successfully attempting to acquire it.
927 *
928 * Preemption has been disabled by the
929 * lock acquisition attempt, so it's safe
930 * to use cpu_number.
931 */
932 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)933 usld_lock_try_post(
934 usimple_lock_t l,
935 pc_t pc)
936 {
937 unsigned int mycpu;
938 char caller[] = "successful usimple_lock_try";
939
940 if (!usld_lock_common_checks(l, caller)) {
941 return;
942 }
943
944 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
945 panic("%s: lock 0x%p became uninitialized",
946 caller, l);
947 }
948 if ((l->debug.state & USLOCK_TAKEN)) {
949 panic("%s: lock 0x%p became TAKEN by someone else",
950 caller, l);
951 }
952
953 mycpu = cpu_number();
954 assert(mycpu <= UCHAR_MAX);
955
956 l->debug.lock_thread = (void *) current_thread();
957 l->debug.state |= USLOCK_TAKEN;
958 l->debug.lock_pc = pc;
959 l->debug.lock_cpu = (unsigned char)mycpu;
960 }
961 #endif /* USLOCK_DEBUG */
962
963 /*
964 * Slow path routines for lck_mtx locking and unlocking functions.
965 *
966 * These functions were previously implemented in x86 assembly,
967 * and some optimizations are in place in this c code to obtain a compiled code
968 * as performant and compact as the assembly version.
969 *
970 * To avoid to inline these functions on the fast path, all functions directly called by
971 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
972 * in such a way the fast path can tail call into them. In this way the return address
973 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
974 *
975 * Slow path code is structured in such a way there are no calls to functions that will return
976 * on the context of the caller function, i.e. all functions called are or tail call functions
977 * or inline functions. The number of arguments of the tail call functions are less then six,
978 * so that they can be passed over registers and do not need to be pushed on stack.
979 * This allows the compiler to not create a stack frame for the functions.
980 *
981 * __improbable and __probable are used to compile the slow path code in such a way
982 * the fast path case will be on a sequence of instructions with as less jumps as possible,
983 * to make this case the most optimized even if falling through the slow path.
984 */
985
986 /*
987 * Intel lock invariants:
988 *
989 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
990 *
991 * The lock owner is promoted to the max priority of all its waiters only if it
992 * was a lower priority when it acquired or was an owner when a waiter waited.
993 * Max priority is capped at MAXPRI_PROMOTE.
994 *
995 * The last waiter will not be promoted as it is woken up, but the last
996 * lock owner may not have been the last thread to have been woken up depending on the
997 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
998 * flag set.
999 *
1000 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1001 * priority from dropping priority in the future without having to take thread lock
1002 * on acquire.
1003 */
1004
1005 /*
1006 * Routine: lck_mtx_alloc_init
1007 */
1008 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1009 lck_mtx_alloc_init(
1010 lck_grp_t *grp,
1011 lck_attr_t *attr)
1012 {
1013 lck_mtx_t *lck;
1014
1015 lck = zalloc(KT_LCK_MTX);
1016 lck_mtx_init(lck, grp, attr);
1017 return lck;
1018 }
1019
1020 /*
1021 * Routine: lck_mtx_free
1022 */
1023 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1024 lck_mtx_free(
1025 lck_mtx_t *lck,
1026 lck_grp_t *grp)
1027 {
1028 lck_mtx_destroy(lck, grp);
1029 zfree(KT_LCK_MTX, lck);
1030 }
1031
1032 /*
1033 * Routine: lck_mtx_init
1034 */
1035 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1036 lck_mtx_init(
1037 lck_mtx_t *lck,
1038 lck_grp_t *grp,
1039 lck_attr_t *attr)
1040 {
1041 lck_mtx_ext_t *lck_ext = NULL;
1042
1043 if (attr == LCK_ATTR_NULL) {
1044 attr = &LockDefaultLckAttr;
1045 }
1046 if (attr->lck_attr_val & LCK_ATTR_DEBUG) {
1047 lck_ext = zalloc(KT_LCK_MTX_EXT);
1048 }
1049 lck_mtx_init_ext(lck, lck_ext, grp, attr);
1050 }
1051
1052 /*
1053 * Routine: lck_mtx_init_ext
1054 */
1055 void
lck_mtx_init_ext(lck_mtx_t * lck,lck_mtx_ext_t * lck_ext,lck_grp_t * grp,lck_attr_t * attr)1056 lck_mtx_init_ext(
1057 lck_mtx_t *lck,
1058 lck_mtx_ext_t *lck_ext,
1059 lck_grp_t *grp,
1060 lck_attr_t *attr)
1061 {
1062 if (attr == LCK_ATTR_NULL) {
1063 attr = &LockDefaultLckAttr;
1064 }
1065
1066 if (__improbable(attr->lck_attr_val & LCK_ATTR_DEBUG)) {
1067 *lck_ext = (lck_mtx_ext_t) {
1068 .lck_mtx_deb.type = MUTEX_TAG,
1069 .lck_mtx_grp = grp,
1070 .lck_mtx.lck_mtx_is_ext = 1,
1071 };
1072 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1073 lck->lck_mtx_ptr = lck_ext;
1074 } else {
1075 lck->lck_mtx_owner = 0;
1076 lck->lck_mtx_state = 0;
1077 }
1078 lck->lck_mtx_pad32 = 0xFFFFFFFF;
1079
1080 lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
1081 }
1082
1083 static void
lck_mtx_lock_mark_destroyed(lck_mtx_t * mutex,boolean_t indirect)1084 lck_mtx_lock_mark_destroyed(
1085 lck_mtx_t *mutex,
1086 boolean_t indirect)
1087 {
1088 uint32_t state;
1089
1090 if (indirect) {
1091 /* convert to destroyed state */
1092 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
1093 return;
1094 }
1095
1096 state = ordered_load_mtx_state(mutex);
1097 lck_mtx_interlock_lock(mutex, &state);
1098
1099 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
1100
1101 enable_preemption();
1102 }
1103
1104 /*
1105 * Routine: lck_mtx_destroy
1106 */
1107 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1108 lck_mtx_destroy(
1109 lck_mtx_t *lck,
1110 lck_grp_t *grp)
1111 {
1112 boolean_t indirect;
1113
1114 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
1115 return;
1116 }
1117 #if MACH_LDEBUG
1118 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1119 #endif
1120 indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1121
1122 lck_mtx_lock_mark_destroyed(lck, indirect);
1123
1124 if (indirect) {
1125 zfree(KT_LCK_MTX_EXT, lck->lck_mtx_ptr);
1126 }
1127 lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1128 }
1129
1130
1131 #if DEVELOPMENT | DEBUG
1132 __attribute__((noinline))
1133 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1134 lck_mtx_owner_check_panic(
1135 lck_mtx_t *lock)
1136 {
1137 thread_t owner = (thread_t)lock->lck_mtx_owner;
1138 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1139 }
1140 #endif
1141
1142 __attribute__((always_inline))
1143 static boolean_t
get_indirect_mutex(lck_mtx_t ** lock,uint32_t * state)1144 get_indirect_mutex(
1145 lck_mtx_t **lock,
1146 uint32_t *state)
1147 {
1148 *lock = &((*lock)->lck_mtx_ptr->lck_mtx);
1149 *state = ordered_load_mtx_state(*lock);
1150 return TRUE;
1151 }
1152
1153 /*
1154 * Routine: lck_mtx_unlock_slow
1155 *
1156 * Unlocks a mutex held by current thread.
1157 *
1158 * It will wake up waiters if necessary.
1159 *
1160 * Interlock can be held.
1161 */
1162 __attribute__((noinline))
1163 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1164 lck_mtx_unlock_slow(
1165 lck_mtx_t *lock)
1166 {
1167 thread_t thread;
1168 uint32_t state, prev;
1169 boolean_t indirect = FALSE;
1170
1171 state = ordered_load_mtx_state(lock);
1172
1173 /* Is this an indirect mutex? */
1174 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1175 indirect = get_indirect_mutex(&lock, &state);
1176 }
1177
1178 thread = current_thread();
1179
1180 #if DEVELOPMENT | DEBUG
1181 thread_t owner = (thread_t)lock->lck_mtx_owner;
1182 if (__improbable(owner != thread)) {
1183 lck_mtx_owner_check_panic(lock);
1184 }
1185 #endif
1186
1187 /* check if it is held as a spinlock */
1188 if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1189 goto unlock;
1190 }
1191
1192 lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1193
1194 unlock:
1195 /* preemption disabled, interlock held and mutex not held */
1196
1197 /* clear owner */
1198 ordered_store_mtx_owner(lock, 0);
1199 /* keep original state in prev for later evaluation */
1200 prev = state;
1201
1202 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1203 #if MACH_LDEBUG
1204 if (thread) {
1205 thread->mutex_count--;
1206 }
1207 #endif
1208 return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
1209 }
1210
1211 /* release interlock, promotion and clear spin flag */
1212 state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1213 ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
1214
1215 #if MACH_LDEBUG
1216 /* perform lock statistics after drop to prevent delay */
1217 if (thread) {
1218 thread->mutex_count--; /* lock statistic */
1219 }
1220 #endif /* MACH_LDEBUG */
1221
1222 /* re-enable preemption */
1223 lck_mtx_unlock_finish_inline(lock, FALSE);
1224
1225 return;
1226 }
1227
1228 #define LCK_MTX_LCK_WAIT_CODE 0x20
1229 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1230 #define LCK_MTX_LCK_SPIN_CODE 0x22
1231 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1232 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1233
1234 /*
1235 * Routine: lck_mtx_unlock_wakeup_tail
1236 *
1237 * Invoked on unlock when there is
1238 * contention, i.e. the assembly routine sees
1239 * that mutex->lck_mtx_waiters != 0
1240 *
1241 * neither the mutex or interlock is held
1242 *
1243 * Note that this routine might not be called if there are pending
1244 * waiters which have previously been woken up, and they didn't
1245 * end up boosting the old owner.
1246 *
1247 * assembly routine previously did the following to mutex:
1248 * (after saving the state in prior_lock_state)
1249 * decremented lck_mtx_waiters if nonzero
1250 *
1251 * This function needs to be called as a tail call
1252 * to optimize the compiled code.
1253 */
1254 __attribute__((noinline))
1255 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state,boolean_t indirect)1256 lck_mtx_unlock_wakeup_tail(
1257 lck_mtx_t *mutex,
1258 uint32_t state,
1259 boolean_t indirect)
1260 {
1261 struct turnstile *ts;
1262
1263 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1264 kern_return_t did_wake;
1265
1266 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1267 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1268
1269 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1270
1271 if (mutex->lck_mtx_waiters > 1) {
1272 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1273 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1274 } else {
1275 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1276 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1277 }
1278 assert(did_wake == KERN_SUCCESS);
1279
1280 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1281 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1282
1283 state -= LCK_MTX_WAITER;
1284 state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1285 ordered_store_mtx_state_release(mutex, state);
1286
1287 assert(current_thread()->turnstile != NULL);
1288
1289 turnstile_cleanup();
1290
1291 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1292 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1293
1294 lck_mtx_unlock_finish_inline(mutex, indirect);
1295 }
1296
1297 /*
1298 * Routine: lck_mtx_lock_acquire_x86
1299 *
1300 * Invoked on acquiring the mutex when there is
1301 * contention (i.e. the assembly routine sees that
1302 * that mutex->lck_mtx_waiters != 0
1303 *
1304 * mutex is owned... interlock is held... preemption is disabled
1305 */
1306 __attribute__((always_inline))
1307 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1308 lck_mtx_lock_acquire_inline(
1309 lck_mtx_t *mutex,
1310 struct turnstile *ts)
1311 {
1312 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1313
1314 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1315 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1316
1317 thread_t thread = (thread_t)mutex->lck_mtx_owner; /* faster than current_thread() */
1318
1319 if (mutex->lck_mtx_waiters > 0) {
1320 if (ts == NULL) {
1321 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1322 }
1323
1324 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1325 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1326 }
1327
1328 if (ts != NULL) {
1329 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1330 }
1331
1332 assert(current_thread()->turnstile != NULL);
1333
1334 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1335 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1336 }
1337
1338 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1339 lck_mtx_lock_acquire_x86(
1340 lck_mtx_t *mutex)
1341 {
1342 return lck_mtx_lock_acquire_inline(mutex, NULL);
1343 }
1344
1345 /*
1346 * Tail call helpers for lock functions that perform
1347 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1348 * the caller's compiled code.
1349 */
1350
1351 __attribute__((noinline))
1352 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,boolean_t indirect,struct turnstile * ts)1353 lck_mtx_lock_acquire_tail(
1354 lck_mtx_t *mutex,
1355 boolean_t indirect,
1356 struct turnstile *ts)
1357 {
1358 lck_mtx_lock_acquire_inline(mutex, ts);
1359 lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex), indirect);
1360 }
1361
1362 __attribute__((noinline))
1363 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1364 lck_mtx_try_lock_acquire_tail(
1365 lck_mtx_t *mutex)
1366 {
1367 lck_mtx_lock_acquire_inline(mutex, NULL);
1368 lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1369
1370 return TRUE;
1371 }
1372
1373 __attribute__((noinline))
1374 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1375 lck_mtx_convert_spin_acquire_tail(
1376 lck_mtx_t *mutex)
1377 {
1378 lck_mtx_lock_acquire_inline(mutex, NULL);
1379 lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1380 }
1381
1382 boolean_t
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1383 lck_mtx_ilk_unlock(
1384 lck_mtx_t *mutex)
1385 {
1386 lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1387 return TRUE;
1388 }
1389
1390 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1391 lck_mtx_interlock_lock_set_and_clear_flags(
1392 lck_mtx_t *mutex,
1393 uint32_t xor_flags,
1394 uint32_t and_flags,
1395 uint32_t *new_state)
1396 {
1397 uint32_t state, prev;
1398 state = *new_state;
1399
1400 for (;;) {
1401 /* have to wait for interlock to clear */
1402 while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1403 cpu_pause();
1404 state = ordered_load_mtx_state(mutex);
1405 }
1406 prev = state; /* prev contains snapshot for exchange */
1407 state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
1408 state &= ~and_flags; /* clear flags */
1409
1410 disable_preemption();
1411 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1412 break;
1413 }
1414 enable_preemption();
1415 cpu_pause();
1416 state = ordered_load_mtx_state(mutex);
1417 }
1418 *new_state = state;
1419 return;
1420 }
1421
1422 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1423 lck_mtx_interlock_lock_clear_flags(
1424 lck_mtx_t *mutex,
1425 uint32_t and_flags,
1426 uint32_t *new_state)
1427 {
1428 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1429 }
1430
1431 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1432 lck_mtx_interlock_lock(
1433 lck_mtx_t *mutex,
1434 uint32_t *new_state)
1435 {
1436 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1437 }
1438
1439 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1440 lck_mtx_interlock_try_lock_set_flags(
1441 lck_mtx_t *mutex,
1442 uint32_t or_flags,
1443 uint32_t *new_state)
1444 {
1445 uint32_t state, prev;
1446 state = *new_state;
1447
1448 /* have to wait for interlock to clear */
1449 if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1450 return 0;
1451 }
1452 prev = state; /* prev contains snapshot for exchange */
1453 state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
1454 disable_preemption();
1455 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1456 *new_state = state;
1457 return 1;
1458 }
1459
1460 enable_preemption();
1461 return 0;
1462 }
1463
1464 __attribute__((noinline))
1465 static void
lck_mtx_lock_contended(lck_mtx_t * lock,boolean_t indirect,boolean_t * first_miss)1466 lck_mtx_lock_contended(
1467 lck_mtx_t *lock,
1468 boolean_t indirect,
1469 boolean_t *first_miss)
1470 {
1471 lck_mtx_spinwait_ret_type_t ret;
1472 uint32_t state;
1473 thread_t thread;
1474 struct turnstile *ts = NULL;
1475
1476 try_again:
1477
1478 if (indirect) {
1479 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, first_miss);
1480 }
1481
1482 ret = lck_mtx_lock_spinwait_x86(lock);
1483 state = ordered_load_mtx_state(lock);
1484 switch (ret) {
1485 case LCK_MTX_SPINWAIT_NO_SPIN:
1486 /*
1487 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1488 * try to spin.
1489 */
1490 if (indirect) {
1491 lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
1492 }
1493
1494 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
1495 OS_FALLTHROUGH;
1496 case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1497 case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1498 case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1499 case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1500 /*
1501 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1502 * interlock not held
1503 */
1504 lck_mtx_interlock_lock(lock, &state);
1505 assert(state & LCK_MTX_ILOCKED_MSK);
1506
1507 if (state & LCK_MTX_MLOCKED_MSK) {
1508 if (indirect) {
1509 lck_grp_mtx_update_wait((struct _lck_mtx_ext_*)lock, first_miss);
1510 }
1511 lck_mtx_lock_wait_x86(lock, &ts);
1512 /*
1513 * interlock is not held here.
1514 */
1515 goto try_again;
1516 } else {
1517 /* grab the mutex */
1518 state |= LCK_MTX_MLOCKED_MSK;
1519 ordered_store_mtx_state_release(lock, state);
1520 thread = current_thread();
1521 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1522 #if MACH_LDEBUG
1523 if (thread) {
1524 thread->mutex_count++;
1525 }
1526 #endif /* MACH_LDEBUG */
1527 }
1528
1529 break;
1530 case LCK_MTX_SPINWAIT_ACQUIRED:
1531 /*
1532 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1533 * interlock is held and preemption disabled
1534 * owner is set and mutex marked as locked
1535 * statistics updated too
1536 */
1537 break;
1538 default:
1539 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1540 }
1541
1542 /*
1543 * interlock is already acquired here
1544 */
1545
1546 /* mutex has been acquired */
1547 thread = (thread_t)lock->lck_mtx_owner;
1548 if (state & LCK_MTX_WAITERS_MSK) {
1549 /*
1550 * lck_mtx_lock_acquire_tail will call
1551 * turnstile_complete.
1552 */
1553 return lck_mtx_lock_acquire_tail(lock, indirect, ts);
1554 }
1555
1556 if (ts != NULL) {
1557 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1558 }
1559
1560 assert(current_thread()->turnstile != NULL);
1561
1562 /* release the interlock */
1563 lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock), indirect);
1564 }
1565
1566 /*
1567 * Helper noinline functions for calling
1568 * panic to optimize compiled code.
1569 */
1570
1571 __attribute__((noinline)) __abortlike
1572 static void
lck_mtx_destroyed(lck_mtx_t * lock)1573 lck_mtx_destroyed(
1574 lck_mtx_t *lock)
1575 {
1576 panic("trying to interlock destroyed mutex (%p)", lock);
1577 }
1578
1579 __attribute__((noinline))
1580 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1581 lck_mtx_try_destroyed(
1582 lck_mtx_t *lock)
1583 {
1584 panic("trying to interlock destroyed mutex (%p)", lock);
1585 return FALSE;
1586 }
1587
1588 __attribute__((always_inline))
1589 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1590 lck_mtx_lock_wait_interlock_to_clear(
1591 lck_mtx_t *lock,
1592 uint32_t* new_state)
1593 {
1594 uint32_t state;
1595
1596 for (;;) {
1597 cpu_pause();
1598 state = ordered_load_mtx_state(lock);
1599 if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1600 *new_state = state;
1601 return TRUE;
1602 }
1603 if (state & LCK_MTX_MLOCKED_MSK) {
1604 /* if it is held as mutex, just fail */
1605 return FALSE;
1606 }
1607 }
1608 }
1609
1610 __attribute__((always_inline))
1611 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1612 lck_mtx_try_lock_wait_interlock_to_clear(
1613 lck_mtx_t *lock,
1614 uint32_t* new_state)
1615 {
1616 uint32_t state;
1617
1618 for (;;) {
1619 cpu_pause();
1620 state = ordered_load_mtx_state(lock);
1621 if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1622 /* if it is held as mutex or spin, just fail */
1623 return FALSE;
1624 }
1625 if (!(state & LCK_MTX_ILOCKED_MSK)) {
1626 *new_state = state;
1627 return TRUE;
1628 }
1629 }
1630 }
1631
1632 /*
1633 * Routine: lck_mtx_lock_slow
1634 *
1635 * Locks a mutex for current thread.
1636 * If the lock is contended this function might
1637 * sleep.
1638 *
1639 * Called with interlock not held.
1640 */
1641 __attribute__((noinline))
1642 void
lck_mtx_lock_slow(lck_mtx_t * lock)1643 lck_mtx_lock_slow(
1644 lck_mtx_t *lock)
1645 {
1646 boolean_t indirect = FALSE;
1647 uint32_t state;
1648 int first_miss = 0;
1649
1650 state = ordered_load_mtx_state(lock);
1651
1652 /* is the interlock or mutex held */
1653 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1654 /*
1655 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1656 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1657 * set in state (state == lck_mtx_tag)
1658 */
1659
1660
1661 /* is the mutex already held and not indirect */
1662 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1663 /* no, must have been the mutex */
1664 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1665 }
1666
1667 /* check to see if it is marked destroyed */
1668 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1669 lck_mtx_destroyed(lock);
1670 }
1671
1672 /* Is this an indirect mutex? */
1673 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1674 indirect = get_indirect_mutex(&lock, &state);
1675
1676 first_miss = 0;
1677 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1678
1679 if (state & LCK_MTX_SPIN_MSK) {
1680 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1681 assert(state & LCK_MTX_ILOCKED_MSK);
1682 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1683 }
1684 }
1685 }
1686
1687 /* no - can't be INDIRECT, DESTROYED or locked */
1688 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1689 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1690 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1691 }
1692 }
1693
1694 /* lock and interlock acquired */
1695
1696 thread_t thread = current_thread();
1697 /* record owner of mutex */
1698 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1699
1700 #if MACH_LDEBUG
1701 if (thread) {
1702 thread->mutex_count++; /* lock statistic */
1703 }
1704 #endif
1705 /*
1706 * Check if there are waiters to
1707 * inherit their priority.
1708 */
1709 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1710 return lck_mtx_lock_acquire_tail(lock, indirect, NULL);
1711 }
1712
1713 /* release the interlock */
1714 lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock), indirect);
1715
1716 return;
1717 }
1718
1719 __attribute__((noinline))
1720 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1721 lck_mtx_try_lock_slow(
1722 lck_mtx_t *lock)
1723 {
1724 boolean_t indirect = FALSE;
1725 uint32_t state;
1726 int first_miss = 0;
1727
1728 state = ordered_load_mtx_state(lock);
1729
1730 /* is the interlock or mutex held */
1731 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1732 /*
1733 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1734 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1735 * set in state (state == lck_mtx_tag)
1736 */
1737
1738 /* is the mutex already held and not indirect */
1739 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1740 return FALSE;
1741 }
1742
1743 /* check to see if it is marked destroyed */
1744 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1745 lck_mtx_try_destroyed(lock);
1746 }
1747
1748 /* Is this an indirect mutex? */
1749 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1750 indirect = get_indirect_mutex(&lock, &state);
1751
1752 first_miss = 0;
1753 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1754 }
1755
1756 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1757 if (indirect) {
1758 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1759 }
1760 return FALSE;
1761 }
1762 }
1763
1764 /* no - can't be INDIRECT, DESTROYED or locked */
1765 if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1766 if (indirect) {
1767 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1768 }
1769 return FALSE;
1770 }
1771
1772 /* lock and interlock acquired */
1773
1774 thread_t thread = current_thread();
1775 /* record owner of mutex */
1776 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1777
1778 #if MACH_LDEBUG
1779 if (thread) {
1780 thread->mutex_count++; /* lock statistic */
1781 }
1782 #endif
1783 /*
1784 * Check if there are waiters to
1785 * inherit their priority.
1786 */
1787 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1788 return lck_mtx_try_lock_acquire_tail(lock);
1789 }
1790
1791 /* release the interlock */
1792 lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1793
1794 return TRUE;
1795 }
1796
1797 __attribute__((noinline))
1798 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1799 lck_mtx_lock_spin_slow(
1800 lck_mtx_t *lock)
1801 {
1802 boolean_t indirect = FALSE;
1803 uint32_t state;
1804 int first_miss = 0;
1805
1806 state = ordered_load_mtx_state(lock);
1807
1808 /* is interlock or mutex held */
1809 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1810 /*
1811 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1812 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1813 * set in state (state == lck_mtx_tag)
1814 */
1815
1816 /* is the mutex already held and not indirect */
1817 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1818 /* no, must have been the mutex */
1819 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1820 }
1821
1822 /* check to see if it is marked destroyed */
1823 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1824 lck_mtx_destroyed(lock);
1825 }
1826
1827 /* Is this an indirect mutex? */
1828 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1829 indirect = get_indirect_mutex(&lock, &state);
1830
1831 first_miss = 0;
1832 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1833
1834 if (state & LCK_MTX_SPIN_MSK) {
1835 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1836 assert(state & LCK_MTX_ILOCKED_MSK);
1837 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1838 }
1839 }
1840 }
1841
1842 bool acquired = true;
1843
1844 #if CONFIG_DTRACE
1845 bool stat_enabled = false;
1846 uint64_t start_time = 0;
1847
1848 if (__probable(lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1849 goto past_spin;
1850 }
1851
1852 if ((lockstat_probemap[LS_LCK_MTX_LOCK_SPIN_SPIN] && !indirect)
1853 || (lockstat_probemap[LS_LCK_MTX_EXT_LOCK_SPIN_SPIN] && indirect)) {
1854 stat_enabled = true;
1855 start_time = mach_absolute_time();
1856 }
1857 #endif /* CONFIG_DTRACE */
1858
1859 /* note - can't be INDIRECT, DESTROYED or locked */
1860 /* note - preemption is not disabled while spinning */
1861 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1862 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1863 acquired = false;
1864 break;
1865 }
1866 }
1867
1868 #if CONFIG_DTRACE
1869 /*
1870 * Note that we record a different probe id depending on whether
1871 * this is a direct or indirect mutex. This allows us to
1872 * penalize only lock groups that have debug/stats enabled
1873 * with dtrace processing if desired.
1874 */
1875 if (stat_enabled) {
1876 if (__probable(!indirect)) {
1877 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_SPIN, lock,
1878 mach_absolute_time() - start_time);
1879 } else {
1880 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN_SPIN, lock,
1881 mach_absolute_time() - start_time);
1882 }
1883 }
1884
1885 past_spin:
1886 #endif /* CONFIG_DTRACE */
1887
1888 if (__improbable(!acquired)) {
1889 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1890 }
1891
1892 /* lock as spinlock and interlock acquired */
1893
1894 thread_t thread = current_thread();
1895 /* record owner of mutex */
1896 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1897
1898 #if MACH_LDEBUG
1899 if (thread) {
1900 thread->mutex_count++; /* lock statistic */
1901 }
1902 #endif
1903 #if CONFIG_DTRACE
1904 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
1905 #endif /* CONFIG_DTRACE */
1906
1907 /* return with the interlock held and preemption disabled */
1908 return;
1909 }
1910
1911 __attribute__((noinline))
1912 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1913 lck_mtx_try_lock_spin_slow(
1914 lck_mtx_t *lock)
1915 {
1916 boolean_t indirect = FALSE;
1917 uint32_t state;
1918 int first_miss = 0;
1919
1920 state = ordered_load_mtx_state(lock);
1921
1922 /* is the interlock or mutex held */
1923 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1924 /*
1925 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1926 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1927 * set in state (state == lck_mtx_tag)
1928 */
1929
1930 /* is the mutex already held and not indirect */
1931 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1932 return FALSE;
1933 }
1934
1935 /* check to see if it is marked destroyed */
1936 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1937 lck_mtx_try_destroyed(lock);
1938 }
1939
1940 /* Is this an indirect mutex? */
1941 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1942 indirect = get_indirect_mutex(&lock, &state);
1943
1944 first_miss = 0;
1945 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1946 }
1947 }
1948
1949 /* note - can't be INDIRECT, DESTROYED or locked */
1950 if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1951 if (indirect) {
1952 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1953 }
1954 return FALSE;
1955 }
1956
1957 /* lock and interlock acquired */
1958
1959 thread_t thread = current_thread();
1960 /* record owner of mutex */
1961 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1962
1963 #if MACH_LDEBUG
1964 if (thread) {
1965 thread->mutex_count++; /* lock statistic */
1966 }
1967 #endif
1968
1969 #if CONFIG_DTRACE
1970 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1971 #endif
1972 return TRUE;
1973 }
1974
1975 __attribute__((noinline))
1976 void
lck_mtx_convert_spin(lck_mtx_t * lock)1977 lck_mtx_convert_spin(
1978 lck_mtx_t *lock)
1979 {
1980 uint32_t state;
1981
1982 state = ordered_load_mtx_state(lock);
1983
1984 /* Is this an indirect mutex? */
1985 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1986 /* If so, take indirection */
1987 get_indirect_mutex(&lock, &state);
1988 }
1989
1990 assertf((thread_t)lock->lck_mtx_owner == current_thread(), "lock %p not owned by thread %p (current owner %p)", lock, current_thread(), (thread_t)lock->lck_mtx_owner );
1991
1992 if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
1993 /* already owned as a mutex, just return */
1994 return;
1995 }
1996
1997 assert(get_preemption_level() > 0);
1998 assert(state & LCK_MTX_ILOCKED_MSK);
1999 assert(state & LCK_MTX_SPIN_MSK);
2000
2001 /*
2002 * Check if there are waiters to
2003 * inherit their priority.
2004 */
2005 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
2006 return lck_mtx_convert_spin_acquire_tail(lock);
2007 }
2008
2009 lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
2010
2011 return;
2012 }
2013
2014 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)2015 lck_mtx_lock_grab_mutex(
2016 lck_mtx_t *lock)
2017 {
2018 uint32_t state;
2019
2020 state = ordered_load_mtx_state(lock);
2021
2022 if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
2023 return FALSE;
2024 }
2025
2026 /* lock and interlock acquired */
2027
2028 thread_t thread = current_thread();
2029 /* record owner of mutex */
2030 ordered_store_mtx_owner(lock, (uintptr_t)thread);
2031
2032 #if MACH_LDEBUG
2033 if (thread) {
2034 thread->mutex_count++; /* lock statistic */
2035 }
2036 #endif
2037 return TRUE;
2038 }
2039
2040 __attribute__((noinline))
2041 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)2042 lck_mtx_assert(
2043 lck_mtx_t *lock,
2044 unsigned int type)
2045 {
2046 thread_t thread, owner;
2047 uint32_t state;
2048
2049 thread = current_thread();
2050 state = ordered_load_mtx_state(lock);
2051
2052 if (state == LCK_MTX_TAG_INDIRECT) {
2053 get_indirect_mutex(&lock, &state);
2054 }
2055
2056 owner = (thread_t)lock->lck_mtx_owner;
2057
2058 if (type == LCK_MTX_ASSERT_OWNED) {
2059 if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
2060 panic("mutex (%p) not owned", lock);
2061 }
2062 } else {
2063 assert(type == LCK_MTX_ASSERT_NOTOWNED);
2064 if (owner == thread) {
2065 panic("mutex (%p) owned", lock);
2066 }
2067 }
2068 }
2069
2070 /*
2071 * Routine: lck_mtx_lock_spinwait_x86
2072 *
2073 * Invoked trying to acquire a mutex when there is contention but
2074 * the holder is running on another processor. We spin for up to a maximum
2075 * time waiting for the lock to be released.
2076 *
2077 * Called with the interlock unlocked.
2078 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
2079 * returns LCK_MTX_SPINWAIT_SPUN if we spun
2080 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
2081 */
2082 __attribute__((noinline))
2083 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)2084 lck_mtx_lock_spinwait_x86(
2085 lck_mtx_t *mutex)
2086 {
2087 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2088 thread_t owner, prev_owner;
2089 uint64_t window_deadline, sliding_deadline, high_deadline;
2090 uint64_t start_time, cur_time, avg_hold_time, bias, delta;
2091 lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2092 int loopcount = 0;
2093 int total_hold_time_samples, window_hold_time_samples, unfairness;
2094 uint i, prev_owner_cpu;
2095 bool owner_on_core, adjust;
2096
2097 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2098 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
2099
2100 start_time = mach_absolute_time();
2101 /*
2102 * window_deadline represents the "learning" phase.
2103 * The thread collects statistics about the lock during
2104 * window_deadline and then it makes a decision on whether to spin more
2105 * or block according to the concurrency behavior
2106 * observed.
2107 *
2108 * Every thread can spin at least low_MutexSpin.
2109 */
2110 window_deadline = start_time + low_MutexSpin;
2111 /*
2112 * Sliding_deadline is the adjusted spin deadline
2113 * computed after the "learning" phase.
2114 */
2115 sliding_deadline = window_deadline;
2116 /*
2117 * High_deadline is a hard deadline. No thread
2118 * can spin more than this deadline.
2119 */
2120 if (high_MutexSpin >= 0) {
2121 high_deadline = start_time + high_MutexSpin;
2122 } else {
2123 high_deadline = start_time + low_MutexSpin * real_ncpus;
2124 }
2125
2126 /*
2127 * Do not know yet which is the owner cpu.
2128 * Initialize prev_owner_cpu with next cpu.
2129 */
2130 prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
2131 total_hold_time_samples = 0;
2132 window_hold_time_samples = 0;
2133 avg_hold_time = 0;
2134 adjust = TRUE;
2135 bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
2136
2137 prev_owner = (thread_t) mutex->lck_mtx_owner;
2138 /*
2139 * Spin while:
2140 * - mutex is locked, and
2141 * - it's locked as a spin lock, and
2142 * - owner is running on another processor, and
2143 * - we haven't spun for long enough.
2144 */
2145 do {
2146 /*
2147 * Try to acquire the lock.
2148 */
2149 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
2150 retval = LCK_MTX_SPINWAIT_ACQUIRED;
2151 break;
2152 }
2153
2154 cur_time = mach_absolute_time();
2155
2156 /*
2157 * Never spin past high_deadline.
2158 */
2159 if (cur_time >= high_deadline) {
2160 retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2161 break;
2162 }
2163
2164 /*
2165 * Check if owner is on core. If not block.
2166 */
2167 owner = (thread_t) mutex->lck_mtx_owner;
2168 if (owner) {
2169 i = prev_owner_cpu;
2170 owner_on_core = FALSE;
2171
2172 disable_preemption();
2173 owner = (thread_t) mutex->lck_mtx_owner;
2174
2175 /*
2176 * For scalability we want to check if the owner is on core
2177 * without locking the mutex interlock.
2178 * If we do not lock the mutex interlock, the owner that we see might be
2179 * invalid, so we cannot dereference it. Therefore we cannot check
2180 * any field of the thread to tell us if it is on core.
2181 * Check if the thread that is running on the other cpus matches the owner.
2182 */
2183 if (owner) {
2184 do {
2185 if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner)) {
2186 owner_on_core = TRUE;
2187 break;
2188 }
2189 if (++i >= real_ncpus) {
2190 i = 0;
2191 }
2192 } while (i != prev_owner_cpu);
2193 enable_preemption();
2194
2195 if (owner_on_core) {
2196 prev_owner_cpu = i;
2197 } else {
2198 prev_owner = owner;
2199 owner = (thread_t) mutex->lck_mtx_owner;
2200 if (owner == prev_owner) {
2201 /*
2202 * Owner is not on core.
2203 * Stop spinning.
2204 */
2205 if (loopcount == 0) {
2206 retval = LCK_MTX_SPINWAIT_NO_SPIN;
2207 } else {
2208 retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2209 }
2210 break;
2211 }
2212 /*
2213 * Fall through if the owner changed while we were scanning.
2214 * The new owner could potentially be on core, so loop
2215 * again.
2216 */
2217 }
2218 } else {
2219 enable_preemption();
2220 }
2221 }
2222
2223 /*
2224 * Save how many times we see the owner changing.
2225 * We can roughly estimate the mutex hold
2226 * time and the fairness with that.
2227 */
2228 if (owner != prev_owner) {
2229 prev_owner = owner;
2230 total_hold_time_samples++;
2231 window_hold_time_samples++;
2232 }
2233
2234 /*
2235 * Learning window expired.
2236 * Try to adjust the sliding_deadline.
2237 */
2238 if (cur_time >= window_deadline) {
2239 /*
2240 * If there was not contention during the window
2241 * stop spinning.
2242 */
2243 if (window_hold_time_samples < 1) {
2244 retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2245 break;
2246 }
2247
2248 if (adjust) {
2249 /*
2250 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2251 * but the lock is unfair, so let's try to estimate by how much.
2252 */
2253 unfairness = total_hold_time_samples / real_ncpus;
2254
2255 if (unfairness == 0) {
2256 /*
2257 * We observed the owner changing `total_hold_time_samples` times which
2258 * let us estimate the average hold time of this mutex for the duration
2259 * of the spin time.
2260 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2261 *
2262 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2263 */
2264 delta = cur_time - start_time;
2265 sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2266 } else {
2267 /*
2268 * In this case at least one of the other cpus was able to get the lock twice
2269 * while I was spinning.
2270 * We could spin longer but it won't necessarily help if the system is unfair.
2271 * Try to randomize the wait to reduce contention.
2272 *
2273 * We compute how much time we could potentially spin
2274 * and distribute it over the cpus.
2275 *
2276 * bias is an integer between 0 and real_ncpus.
2277 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2278 */
2279 delta = high_deadline - cur_time;
2280 sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2281 adjust = FALSE;
2282 }
2283 }
2284
2285 window_deadline += low_MutexSpin;
2286 window_hold_time_samples = 0;
2287 }
2288
2289 /*
2290 * Stop spinning if we past
2291 * the adjusted deadline.
2292 */
2293 if (cur_time >= sliding_deadline) {
2294 retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2295 break;
2296 }
2297
2298 if ((thread_t) mutex->lck_mtx_owner != NULL) {
2299 cpu_pause();
2300 }
2301
2302 loopcount++;
2303 } while (TRUE);
2304
2305 #if CONFIG_DTRACE
2306 /*
2307 * Note that we record a different probe id depending on whether
2308 * this is a direct or indirect mutex. This allows us to
2309 * penalize only lock groups that have debug/stats enabled
2310 * with dtrace processing if desired.
2311 */
2312 if (__probable(mutex->lck_mtx_is_ext == 0)) {
2313 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ADAPTIVE_SPIN, mutex,
2314 mach_absolute_time() - start_time);
2315 } else {
2316 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ADAPTIVE_SPIN, mutex,
2317 mach_absolute_time() - start_time);
2318 }
2319 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2320 #endif
2321
2322 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2323 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
2324
2325 return retval;
2326 }
2327
2328
2329
2330 /*
2331 * Routine: lck_mtx_lock_wait_x86
2332 *
2333 * Invoked in order to wait on contention.
2334 *
2335 * Called with the interlock locked and
2336 * preemption disabled...
2337 * returns it unlocked and with preemption enabled
2338 *
2339 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2340 * A runnable waiter can exist between wait and acquire
2341 * without a waiters count being set.
2342 * This allows us to never make a spurious wakeup call.
2343 *
2344 * Priority:
2345 * This avoids taking the thread lock if the owning thread is the same priority.
2346 * This optimizes the case of same-priority threads contending on a lock.
2347 * However, that allows the owning thread to drop in priority while holding the lock,
2348 * because there is no state that the priority change can notice that
2349 * says that the targeted thread holds a contended mutex.
2350 *
2351 * One possible solution: priority changes could look for some atomic tag
2352 * on the thread saying 'holding contended lock', and then set up a promotion.
2353 * Needs a story for dropping that promotion - the last contended unlock
2354 * has to notice that this has happened.
2355 */
2356 __attribute__((noinline))
2357 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2358 lck_mtx_lock_wait_x86(
2359 lck_mtx_t *mutex,
2360 struct turnstile **ts)
2361 {
2362 thread_t self = current_thread();
2363
2364 #if CONFIG_DTRACE
2365 uint64_t sleep_start = 0;
2366
2367 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2368 sleep_start = mach_absolute_time();
2369 }
2370 #endif
2371 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2372
2373 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2374 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
2375 mutex->lck_mtx_waiters, 0, 0);
2376
2377 mutex->lck_mtx_waiters++;
2378
2379 thread_t holder = (thread_t)mutex->lck_mtx_owner;
2380 assert(holder != NULL);
2381
2382 /*
2383 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2384 * the same turnstile while looping, the matching turnstile compleate will be called
2385 * by lck_mtx_lock_contended when finally acquiring the lock.
2386 */
2387 if (*ts == NULL) {
2388 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2389 }
2390
2391 struct turnstile *turnstile = *ts;
2392 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2393 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2394
2395 waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2396
2397 lck_mtx_ilk_unlock(mutex);
2398
2399 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2400
2401 thread_block(THREAD_CONTINUE_NULL);
2402
2403 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2404 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
2405 mutex->lck_mtx_waiters, 0, 0);
2406
2407 #if CONFIG_DTRACE
2408 /*
2409 * Record the Dtrace lockstat probe for blocking, block time
2410 * measured from when we were entered.
2411 */
2412 if (sleep_start) {
2413 if (mutex->lck_mtx_is_ext == 0) {
2414 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2415 mach_absolute_time() - sleep_start);
2416 } else {
2417 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2418 mach_absolute_time() - sleep_start);
2419 }
2420 }
2421 #endif
2422 }
2423
2424 /*
2425 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2426 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2427 * Returns: TRUE if lock is acquired.
2428 */
2429 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2430 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2431 {
2432 if (not_in_kdp) {
2433 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2434 }
2435
2436 if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2437 return TRUE;
2438 }
2439
2440 return FALSE;
2441 }
2442
2443 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2444 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2445 {
2446 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2447 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2448 thread_t holder = (thread_t)mutex->lck_mtx_owner;
2449 waitinfo->owner = thread_tid(holder);
2450 }
2451