1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #define LOCK_PRIVATE 1
65
66 #include <mach_ldebug.h>
67
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <machine/atomic.h>
82 #include <machine/machine_cpu.h>
83 #include <i386/mp.h>
84 #include <machine/atomic.h>
85 #include <sys/kdebug.h>
86 #include <i386/locks_i386_inlines.h>
87 #include <kern/cpu_number.h>
88 #include <os/hash.h>
89
90 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
91
92 uint64_t _Atomic lock_panic_timeout = 0xf000000; /* 251e6 TSC ticks */
93
94 /* Forwards */
95
96 #if USLOCK_DEBUG
97 /*
98 * Perform simple lock checks.
99 */
100 int uslock_check = 1;
101 int max_lock_loops = 100000000;
102 decl_simple_lock_data(extern, printf_lock);
103 decl_simple_lock_data(extern, panic_lock);
104 #endif /* USLOCK_DEBUG */
105
106 extern unsigned int not_in_kdp;
107
108 #if !LOCK_STATS
109 #define usimple_lock_nopreempt(lck, grp) \
110 usimple_lock_nopreempt(lck)
111 #define usimple_lock_try_nopreempt(lck, grp) \
112 usimple_lock_try_nopreempt(lck)
113 #endif
114 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
115 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
116
117 /*
118 * We often want to know the addresses of the callers
119 * of the various lock routines. However, this information
120 * is only used for debugging and statistics.
121 */
122 typedef void *pc_t;
123 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
124 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
125 #if ANY_LOCK_DEBUG
126 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
127 #define DECL_PC(pc) pc_t pc;
128 #else /* ANY_LOCK_DEBUG */
129 #define DECL_PC(pc)
130 #ifdef lint
131 /*
132 * Eliminate lint complaints about unused local pc variables.
133 */
134 #define OBTAIN_PC(pc) ++pc
135 #else /* lint */
136 #define OBTAIN_PC(pc)
137 #endif /* lint */
138 #endif /* USLOCK_DEBUG */
139
140 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
141
142 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
143
144 KALLOC_TYPE_DEFINE(KT_LCK_MTX_EXT, lck_mtx_ext_t, KT_PRIV_ACCT);
145
146 /*
147 * atomic exchange API is a low level abstraction of the operations
148 * to atomically read, modify, and write a pointer. This abstraction works
149 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
150 * well as the ARM exclusive instructions.
151 *
152 * atomic_exchange_begin() - begin exchange and retrieve current value
153 * atomic_exchange_complete() - conclude an exchange
154 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
155 */
156 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)157 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
158 {
159 uint32_t val;
160
161 (void)ord; // Memory order not used
162 val = os_atomic_load(target, relaxed);
163 *previous = val;
164 return val;
165 }
166
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
171 }
172
173 void
atomic_exchange_abort(void)174 atomic_exchange_abort(void)
175 {
176 }
177
178 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)179 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
180 {
181 uint32_t value, prev;
182
183 for (;;) {
184 value = atomic_exchange_begin32(target, &prev, ord);
185 if (value & test_mask) {
186 if (wait) {
187 cpu_pause();
188 } else {
189 atomic_exchange_abort();
190 }
191 return FALSE;
192 }
193 value |= set_mask;
194 if (atomic_exchange_complete32(target, prev, value, ord)) {
195 return TRUE;
196 }
197 }
198 }
199
200 /*
201 * Portable lock package implementation of usimple_locks.
202 */
203
204 #if USLOCK_DEBUG
205 #define USLDBG(stmt) stmt
206 void usld_lock_init(usimple_lock_t, unsigned short);
207 void usld_lock_pre(usimple_lock_t, pc_t);
208 void usld_lock_post(usimple_lock_t, pc_t);
209 void usld_unlock(usimple_lock_t, pc_t);
210 void usld_lock_try_pre(usimple_lock_t, pc_t);
211 void usld_lock_try_post(usimple_lock_t, pc_t);
212 int usld_lock_common_checks(usimple_lock_t, char *);
213 #else /* USLOCK_DEBUG */
214 #define USLDBG(stmt)
215 #endif /* USLOCK_DEBUG */
216
217 /*
218 * Forward definitions
219 */
220
221 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
222 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
223 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
224 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
225 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
226 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
227
228
229 /*
230 * Routine: lck_spin_alloc_init
231 */
232 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)233 lck_spin_alloc_init(
234 lck_grp_t *grp,
235 lck_attr_t *attr)
236 {
237 lck_spin_t *lck;
238
239 lck = zalloc(KT_LCK_SPIN);
240 lck_spin_init(lck, grp, attr);
241 return lck;
242 }
243
244 /*
245 * Routine: lck_spin_free
246 */
247 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)248 lck_spin_free(
249 lck_spin_t *lck,
250 lck_grp_t *grp)
251 {
252 lck_spin_destroy(lck, grp);
253 zfree(KT_LCK_SPIN, lck);
254 }
255
256 /*
257 * Routine: lck_spin_init
258 */
259 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)260 lck_spin_init(
261 lck_spin_t *lck,
262 lck_grp_t *grp,
263 __unused lck_attr_t *attr)
264 {
265 usimple_lock_init((usimple_lock_t) lck, 0);
266 if (grp) {
267 lck_grp_reference(grp);
268 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
269 }
270 }
271
272 /*
273 * Routine: lck_spin_destroy
274 */
275 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)276 lck_spin_destroy(
277 lck_spin_t *lck,
278 lck_grp_t *grp)
279 {
280 if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
281 return;
282 }
283 lck->interlock = LCK_SPIN_TAG_DESTROYED;
284 if (grp) {
285 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
286 lck_grp_deallocate(grp);
287 }
288 return;
289 }
290
291 /*
292 * Routine: lck_spin_lock
293 */
294 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)295 lck_spin_lock_grp(
296 lck_spin_t *lck,
297 lck_grp_t *grp)
298 {
299 #pragma unused(grp)
300 usimple_lock((usimple_lock_t) lck, grp);
301 }
302
303 void
lck_spin_lock(lck_spin_t * lck)304 lck_spin_lock(
305 lck_spin_t *lck)
306 {
307 usimple_lock((usimple_lock_t) lck, NULL);
308 }
309
310 void
lck_spin_lock_nopreempt(lck_spin_t * lck)311 lck_spin_lock_nopreempt(
312 lck_spin_t *lck)
313 {
314 usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
315 }
316
317 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)318 lck_spin_lock_nopreempt_grp(
319 lck_spin_t *lck,
320 lck_grp_t *grp)
321 {
322 #pragma unused(grp)
323 usimple_lock_nopreempt((usimple_lock_t) lck, grp);
324 }
325
326 /*
327 * Routine: lck_spin_unlock
328 */
329 void
lck_spin_unlock(lck_spin_t * lck)330 lck_spin_unlock(
331 lck_spin_t *lck)
332 {
333 usimple_unlock((usimple_lock_t) lck);
334 }
335
336 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)337 lck_spin_unlock_nopreempt(
338 lck_spin_t *lck)
339 {
340 usimple_unlock_nopreempt((usimple_lock_t) lck);
341 }
342
343 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)344 lck_spin_try_lock_grp(
345 lck_spin_t *lck,
346 lck_grp_t *grp)
347 {
348 #pragma unused(grp)
349 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
350 #if DEVELOPMENT || DEBUG
351 if (lrval) {
352 pltrace(FALSE);
353 }
354 #endif
355 return lrval;
356 }
357
358
359 /*
360 * Routine: lck_spin_try_lock
361 */
362 boolean_t
lck_spin_try_lock(lck_spin_t * lck)363 lck_spin_try_lock(
364 lck_spin_t *lck)
365 {
366 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
367 #if DEVELOPMENT || DEBUG
368 if (lrval) {
369 pltrace(FALSE);
370 }
371 #endif
372 return lrval;
373 }
374
375 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)376 lck_spin_try_lock_nopreempt(
377 lck_spin_t *lck)
378 {
379 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
380 #if DEVELOPMENT || DEBUG
381 if (lrval) {
382 pltrace(FALSE);
383 }
384 #endif
385 return lrval;
386 }
387
388 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)389 lck_spin_try_lock_nopreempt_grp(
390 lck_spin_t *lck,
391 lck_grp_t *grp)
392 {
393 #pragma unused(grp)
394 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
395 #if DEVELOPMENT || DEBUG
396 if (lrval) {
397 pltrace(FALSE);
398 }
399 #endif
400 return lrval;
401 }
402
403 /*
404 * Routine: lck_spin_assert
405 */
406 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)407 lck_spin_assert(lck_spin_t *lock, unsigned int type)
408 {
409 thread_t thread, holder;
410 uintptr_t state;
411
412 if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
413 panic("lck_spin_assert(): invalid arg (%u)", type);
414 }
415
416 state = lock->interlock;
417 holder = (thread_t)state;
418 thread = current_thread();
419 if (type == LCK_ASSERT_OWNED) {
420 if (__improbable(holder == THREAD_NULL)) {
421 panic("Lock not owned %p = %lx", lock, state);
422 }
423 if (__improbable(holder != thread)) {
424 panic("Lock not owned by current thread %p = %lx", lock, state);
425 }
426 } else if (type == LCK_ASSERT_NOTOWNED) {
427 if (__improbable(holder != THREAD_NULL)) {
428 if (holder == thread) {
429 panic("Lock owned by current thread %p = %lx", lock, state);
430 }
431 }
432 }
433 }
434
435 /*
436 * Routine: kdp_lck_spin_is_acquired
437 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
438 * Returns: TRUE if lock is acquired.
439 */
440 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)441 kdp_lck_spin_is_acquired(lck_spin_t *lck)
442 {
443 if (not_in_kdp) {
444 panic("panic: spinlock acquired check done outside of kernel debugger");
445 }
446 return (lck->interlock != 0)? TRUE : FALSE;
447 }
448
449 /*
450 * Initialize a usimple_lock.
451 *
452 * No change in preemption state.
453 */
454 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)455 usimple_lock_init(
456 usimple_lock_t l,
457 __unused unsigned short tag)
458 {
459 USLDBG(usld_lock_init(l, tag));
460 hw_lock_init(&l->interlock);
461 }
462
463 static hw_lock_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)464 usimple_lock_acquire_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
465 {
466 #pragma unused(interrupt_time)
467
468 usimple_lock_t l = _lock;
469 uintptr_t lowner;
470 lck_spinlock_to_info_t lsti;
471
472 if (machine_timeout_suspended()) {
473 return HW_LOCK_TIMEOUT_CONTINUE;
474 }
475
476 lowner = (uintptr_t)l->interlock.lock_data;
477 lsti = lck_spinlock_timeout_hit(l, lowner);
478
479 panic("Spinlock acquisition timed out: lock=%p, "
480 "lock owner thread=0x%lx, current_thread: %p, "
481 "lock owner active on CPU %d, current owner: 0x%lx, "
482 #if INTERRUPT_MASKED_DEBUG
483 "interrupt time: %llu, "
484 #endif /* INTERRUPT_MASKED_DEBUG */
485 "spin time: %llu, start time: %llu, now: %llu, timeout: %llu",
486 l, lowner, current_thread(), lsti->owner_cpu,
487 (uintptr_t)l->interlock.lock_data,
488 #if INTERRUPT_MASKED_DEBUG
489 interrupt_time,
490 #endif /* INTERRUPT_MASKED_DEBUG */
491 now - start, start, now, timeout);
492 }
493
494 /*
495 * Acquire a usimple_lock.
496 *
497 * Returns with preemption disabled. Note
498 * that the hw_lock routines are responsible for
499 * maintaining preemption state.
500 */
501 void
502 (usimple_lock)(
503 usimple_lock_t l
504 LCK_GRP_ARG(lck_grp_t *grp))
505 {
506 DECL_PC(pc);
507
508 OBTAIN_PC(pc);
509 USLDBG(usld_lock_pre(l, pc));
510
511 (void)hw_lock_to(&l->interlock, LockTimeOutTSC,
512 usimple_lock_acquire_timeout_panic, grp);
513 #if DEVELOPMENT || DEBUG
514 pltrace(FALSE);
515 #endif
516
517 USLDBG(usld_lock_post(l, pc));
518 #if CONFIG_DTRACE
519 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
520 #endif
521 }
522
523 /*
524 * Acquire a usimple_lock_nopreempt
525 *
526 * Called and returns with preemption disabled. Note
527 * that the hw_lock routines are responsible for
528 * maintaining preemption state.
529 */
530 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)531 usimple_lock_nopreempt(
532 usimple_lock_t l,
533 lck_grp_t *grp)
534 {
535 DECL_PC(pc);
536
537 OBTAIN_PC(pc);
538 USLDBG(usld_lock_pre(l, pc));
539
540 (void)hw_lock_to_nopreempt(&l->interlock, LockTimeOutTSC,
541 usimple_lock_acquire_timeout_panic, grp);
542
543 #if DEVELOPMENT || DEBUG
544 pltrace(FALSE);
545 #endif
546
547 USLDBG(usld_lock_post(l, pc));
548 #if CONFIG_DTRACE
549 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
550 #endif
551 }
552
553
554 /*
555 * Release a usimple_lock.
556 *
557 * Returns with preemption enabled. Note
558 * that the hw_lock routines are responsible for
559 * maintaining preemption state.
560 */
561 void
usimple_unlock(usimple_lock_t l)562 usimple_unlock(
563 usimple_lock_t l)
564 {
565 DECL_PC(pc);
566
567 OBTAIN_PC(pc);
568 USLDBG(usld_unlock(l, pc));
569 #if DEVELOPMENT || DEBUG
570 pltrace(TRUE);
571 #endif
572 hw_lock_unlock(&l->interlock);
573 }
574
575 /*
576 * Release a usimple_unlock_nopreempt.
577 *
578 * Called and returns with preemption enabled. Note
579 * that the hw_lock routines are responsible for
580 * maintaining preemption state.
581 */
582 void
usimple_unlock_nopreempt(usimple_lock_t l)583 usimple_unlock_nopreempt(
584 usimple_lock_t l)
585 {
586 DECL_PC(pc);
587
588 OBTAIN_PC(pc);
589 USLDBG(usld_unlock(l, pc));
590 #if DEVELOPMENT || DEBUG
591 pltrace(TRUE);
592 #endif
593 hw_lock_unlock_nopreempt(&l->interlock);
594 }
595
596 /*
597 * Conditionally acquire a usimple_lock.
598 *
599 * On success, returns with preemption disabled.
600 * On failure, returns with preemption in the same state
601 * as when first invoked. Note that the hw_lock routines
602 * are responsible for maintaining preemption state.
603 *
604 * XXX No stats are gathered on a miss; I preserved this
605 * behavior from the original assembly-language code, but
606 * doesn't it make sense to log misses? XXX
607 */
608 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)609 usimple_lock_try(
610 usimple_lock_t l,
611 lck_grp_t *grp)
612 {
613 unsigned int success;
614 DECL_PC(pc);
615
616 OBTAIN_PC(pc);
617 USLDBG(usld_lock_try_pre(l, pc));
618 if ((success = hw_lock_try(&l->interlock, grp))) {
619 #if DEVELOPMENT || DEBUG
620 pltrace(FALSE);
621 #endif
622 USLDBG(usld_lock_try_post(l, pc));
623 }
624 return success;
625 }
626
627 /*
628 * Conditionally acquire a usimple_lock.
629 *
630 * Called and returns with preemption disabled. Note
631 * that the hw_lock routines are responsible for
632 * maintaining preemption state.
633 *
634 * XXX No stats are gathered on a miss; I preserved this
635 * behavior from the original assembly-language code, but
636 * doesn't it make sense to log misses? XXX
637 */
638 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)639 usimple_lock_try_nopreempt(
640 usimple_lock_t l,
641 lck_grp_t *grp)
642 {
643 unsigned int success;
644 DECL_PC(pc);
645
646 OBTAIN_PC(pc);
647 USLDBG(usld_lock_try_pre(l, pc));
648 if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
649 #if DEVELOPMENT || DEBUG
650 pltrace(FALSE);
651 #endif
652 USLDBG(usld_lock_try_post(l, pc));
653 }
654 return success;
655 }
656
657 /*
658 * Acquire a usimple_lock while polling for pending cpu signals
659 * and spinning on a lock.
660 *
661 */
662 unsigned
663 int
664 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
665 uint64_t deadline
666 LCK_GRP_ARG(lck_grp_t *grp))
667 {
668 boolean_t istate = ml_get_interrupts_enabled();
669
670 if (deadline < mach_absolute_time()) {
671 return 0;
672 }
673
674 while (!simple_lock_try(l, grp)) {
675 if (!istate) {
676 cpu_signal_handler(NULL);
677 }
678
679 if (deadline < mach_absolute_time()) {
680 return 0;
681 }
682
683 cpu_pause();
684 }
685
686 return 1;
687 }
688
689 void
690 (usimple_lock_try_lock_loop)(usimple_lock_t l
691 LCK_GRP_ARG(lck_grp_t *grp))
692 {
693 /* When the lock is not contended, grab the lock and go. */
694 if (!simple_lock_try(l, grp)) {
695 usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
696 }
697 }
698
699 unsigned
700 int
701 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
702 uint64_t duration
703 LCK_GRP_ARG(lck_grp_t *grp))
704 {
705 uint64_t deadline;
706 uint64_t base_at;
707 uint64_t duration_at;
708
709 /* Fast track for uncontended locks */
710 if (simple_lock_try(l, grp)) {
711 return 1;
712 }
713
714 base_at = mach_absolute_time();
715
716 nanoseconds_to_absolutetime(duration, &duration_at);
717 deadline = base_at + duration_at;
718 if (deadline < base_at) {
719 /* deadline has overflowed, make it saturate */
720 deadline = ULLONG_MAX;
721 }
722
723 return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
724 }
725
726 #if USLOCK_DEBUG
727 /*
728 * States of a usimple_lock. The default when initializing
729 * a usimple_lock is setting it up for debug checking.
730 */
731 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
732 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
733 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
734 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
735 #define USLOCK_CHECKING(l) (uslock_check && \
736 ((l)->debug.state & USLOCK_CHECKED))
737
738 /*
739 * Initialize the debugging information contained
740 * in a usimple_lock.
741 */
742 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)743 usld_lock_init(
744 usimple_lock_t l,
745 __unused unsigned short tag)
746 {
747 if (l == USIMPLE_LOCK_NULL) {
748 panic("lock initialization: null lock pointer");
749 }
750 l->lock_type = USLOCK_TAG;
751 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
752 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
753 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
754 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
755 l->debug.duration[0] = l->debug.duration[1] = 0;
756 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
757 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
758 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
759 }
760
761
762 /*
763 * These checks apply to all usimple_locks, not just
764 * those with USLOCK_CHECKED turned on.
765 */
766 int
usld_lock_common_checks(usimple_lock_t l,char * caller)767 usld_lock_common_checks(
768 usimple_lock_t l,
769 char *caller)
770 {
771 if (l == USIMPLE_LOCK_NULL) {
772 panic("%s: null lock pointer", caller);
773 }
774 if (l->lock_type != USLOCK_TAG) {
775 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
776 }
777 if (!(l->debug.state & USLOCK_INIT)) {
778 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
779 }
780 return USLOCK_CHECKING(l);
781 }
782
783
784 /*
785 * Debug checks on a usimple_lock just before attempting
786 * to acquire it.
787 */
788 /* ARGSUSED */
789 void
usld_lock_pre(usimple_lock_t l,pc_t pc)790 usld_lock_pre(
791 usimple_lock_t l,
792 pc_t pc)
793 {
794 char caller[] = "usimple_lock";
795
796
797 if (!usld_lock_common_checks(l, caller)) {
798 return;
799 }
800
801 /*
802 * Note that we have a weird case where we are getting a lock when we are]
803 * in the process of putting the system to sleep. We are running with no
804 * current threads, therefore we can't tell if we are trying to retake a lock
805 * we have or someone on the other processor has it. Therefore we just
806 * ignore this test if the locking thread is 0.
807 */
808
809 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
810 l->debug.lock_thread == (void *) current_thread()) {
811 printf("%s: lock %p already locked (at %p) by",
812 caller, l, l->debug.lock_pc);
813 printf(" current thread %p (new attempt at pc %p)\n",
814 l->debug.lock_thread, pc);
815 panic("%s", caller);
816 }
817 mp_disable_preemption();
818 mp_enable_preemption();
819 }
820
821
822 /*
823 * Debug checks on a usimple_lock just after acquiring it.
824 *
825 * Pre-emption has been disabled at this point,
826 * so we are safe in using cpu_number.
827 */
828 void
usld_lock_post(usimple_lock_t l,pc_t pc)829 usld_lock_post(
830 usimple_lock_t l,
831 pc_t pc)
832 {
833 unsigned int mycpu;
834 char caller[] = "successful usimple_lock";
835
836
837 if (!usld_lock_common_checks(l, caller)) {
838 return;
839 }
840
841 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
842 panic("%s: lock %p became uninitialized",
843 caller, l);
844 }
845 if ((l->debug.state & USLOCK_TAKEN)) {
846 panic("%s: lock 0x%p became TAKEN by someone else",
847 caller, l);
848 }
849
850 mycpu = (unsigned int)cpu_number();
851 assert(mycpu <= UCHAR_MAX);
852
853 l->debug.lock_thread = (void *)current_thread();
854 l->debug.state |= USLOCK_TAKEN;
855 l->debug.lock_pc = pc;
856 l->debug.lock_cpu = (unsigned char)mycpu;
857 }
858
859
860 /*
861 * Debug checks on a usimple_lock just before
862 * releasing it. Note that the caller has not
863 * yet released the hardware lock.
864 *
865 * Preemption is still disabled, so there's
866 * no problem using cpu_number.
867 */
868 void
usld_unlock(usimple_lock_t l,pc_t pc)869 usld_unlock(
870 usimple_lock_t l,
871 pc_t pc)
872 {
873 unsigned int mycpu;
874 char caller[] = "usimple_unlock";
875
876
877 if (!usld_lock_common_checks(l, caller)) {
878 return;
879 }
880
881 mycpu = cpu_number();
882 assert(mycpu <= UCHAR_MAX);
883
884 if (!(l->debug.state & USLOCK_TAKEN)) {
885 panic("%s: lock 0x%p hasn't been taken",
886 caller, l);
887 }
888 if (l->debug.lock_thread != (void *) current_thread()) {
889 panic("%s: unlocking lock 0x%p, owned by thread %p",
890 caller, l, l->debug.lock_thread);
891 }
892 if (l->debug.lock_cpu != mycpu) {
893 printf("%s: unlocking lock 0x%p on cpu 0x%x",
894 caller, l, mycpu);
895 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
896 panic("%s", caller);
897 }
898
899 l->debug.unlock_thread = l->debug.lock_thread;
900 l->debug.lock_thread = INVALID_PC;
901 l->debug.state &= ~USLOCK_TAKEN;
902 l->debug.unlock_pc = pc;
903 l->debug.unlock_cpu = (unsigned char)mycpu;
904 }
905
906
907 /*
908 * Debug checks on a usimple_lock just before
909 * attempting to acquire it.
910 *
911 * Preemption isn't guaranteed to be disabled.
912 */
913 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)914 usld_lock_try_pre(
915 usimple_lock_t l,
916 __unused pc_t pc)
917 {
918 char caller[] = "usimple_lock_try";
919
920 if (!usld_lock_common_checks(l, caller)) {
921 return;
922 }
923 }
924
925
926 /*
927 * Debug checks on a usimple_lock just after
928 * successfully attempting to acquire it.
929 *
930 * Preemption has been disabled by the
931 * lock acquisition attempt, so it's safe
932 * to use cpu_number.
933 */
934 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)935 usld_lock_try_post(
936 usimple_lock_t l,
937 pc_t pc)
938 {
939 unsigned int mycpu;
940 char caller[] = "successful usimple_lock_try";
941
942 if (!usld_lock_common_checks(l, caller)) {
943 return;
944 }
945
946 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
947 panic("%s: lock 0x%p became uninitialized",
948 caller, l);
949 }
950 if ((l->debug.state & USLOCK_TAKEN)) {
951 panic("%s: lock 0x%p became TAKEN by someone else",
952 caller, l);
953 }
954
955 mycpu = cpu_number();
956 assert(mycpu <= UCHAR_MAX);
957
958 l->debug.lock_thread = (void *) current_thread();
959 l->debug.state |= USLOCK_TAKEN;
960 l->debug.lock_pc = pc;
961 l->debug.lock_cpu = (unsigned char)mycpu;
962 }
963 #endif /* USLOCK_DEBUG */
964
965 /*
966 * Slow path routines for lck_mtx locking and unlocking functions.
967 *
968 * These functions were previously implemented in x86 assembly,
969 * and some optimizations are in place in this c code to obtain a compiled code
970 * as performant and compact as the assembly version.
971 *
972 * To avoid to inline these functions on the fast path, all functions directly called by
973 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
974 * in such a way the fast path can tail call into them. In this way the return address
975 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
976 *
977 * Slow path code is structured in such a way there are no calls to functions that will return
978 * on the context of the caller function, i.e. all functions called are or tail call functions
979 * or inline functions. The number of arguments of the tail call functions are less then six,
980 * so that they can be passed over registers and do not need to be pushed on stack.
981 * This allows the compiler to not create a stack frame for the functions.
982 *
983 * __improbable and __probable are used to compile the slow path code in such a way
984 * the fast path case will be on a sequence of instructions with as less jumps as possible,
985 * to make this case the most optimized even if falling through the slow path.
986 */
987
988 /*
989 * Intel lock invariants:
990 *
991 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
992 *
993 * The lock owner is promoted to the max priority of all its waiters only if it
994 * was a lower priority when it acquired or was an owner when a waiter waited.
995 * Max priority is capped at MAXPRI_PROMOTE.
996 *
997 * The last waiter will not be promoted as it is woken up, but the last
998 * lock owner may not have been the last thread to have been woken up depending on the
999 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
1000 * flag set.
1001 *
1002 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1003 * priority from dropping priority in the future without having to take thread lock
1004 * on acquire.
1005 */
1006
1007 /*
1008 * Routine: lck_mtx_alloc_init
1009 */
1010 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1011 lck_mtx_alloc_init(
1012 lck_grp_t *grp,
1013 lck_attr_t *attr)
1014 {
1015 lck_mtx_t *lck;
1016
1017 lck = zalloc(KT_LCK_MTX);
1018 lck_mtx_init(lck, grp, attr);
1019 return lck;
1020 }
1021
1022 /*
1023 * Routine: lck_mtx_free
1024 */
1025 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1026 lck_mtx_free(
1027 lck_mtx_t *lck,
1028 lck_grp_t *grp)
1029 {
1030 lck_mtx_destroy(lck, grp);
1031 zfree(KT_LCK_MTX, lck);
1032 }
1033
1034 /*
1035 * Routine: lck_mtx_ext_init
1036 */
1037 static void
lck_mtx_ext_init(lck_mtx_ext_t * lck,lck_grp_t * grp,lck_attr_t * attr)1038 lck_mtx_ext_init(
1039 lck_mtx_ext_t *lck,
1040 lck_grp_t *grp,
1041 lck_attr_t *attr)
1042 {
1043 bzero((void *)lck, sizeof(lck_mtx_ext_t));
1044
1045 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1046 lck->lck_mtx_deb.type = MUTEX_TAG;
1047 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
1048 }
1049
1050 lck->lck_mtx_grp = grp;
1051
1052 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
1053 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
1054 }
1055
1056 lck->lck_mtx.lck_mtx_is_ext = 1;
1057 lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
1058 }
1059
1060 /*
1061 * Routine: lck_mtx_init
1062 */
1063 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1064 lck_mtx_init(
1065 lck_mtx_t *lck,
1066 lck_grp_t *grp,
1067 lck_attr_t *attr)
1068 {
1069 lck_mtx_ext_t *lck_ext;
1070 lck_attr_t *lck_attr;
1071
1072 if (attr != LCK_ATTR_NULL) {
1073 lck_attr = attr;
1074 } else {
1075 lck_attr = &LockDefaultLckAttr;
1076 }
1077
1078 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1079 lck_ext = zalloc(KT_LCK_MTX_EXT);
1080 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1081 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1082 lck->lck_mtx_ptr = lck_ext;
1083 } else {
1084 lck->lck_mtx_owner = 0;
1085 lck->lck_mtx_state = 0;
1086 }
1087 lck->lck_mtx_pad32 = 0xFFFFFFFF;
1088 lck_grp_reference(grp);
1089 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1090 }
1091
1092 /*
1093 * Routine: lck_mtx_init_ext
1094 */
1095 void
lck_mtx_init_ext(lck_mtx_t * lck,lck_mtx_ext_t * lck_ext,lck_grp_t * grp,lck_attr_t * attr)1096 lck_mtx_init_ext(
1097 lck_mtx_t *lck,
1098 lck_mtx_ext_t *lck_ext,
1099 lck_grp_t *grp,
1100 lck_attr_t *attr)
1101 {
1102 lck_attr_t *lck_attr;
1103
1104 if (attr != LCK_ATTR_NULL) {
1105 lck_attr = attr;
1106 } else {
1107 lck_attr = &LockDefaultLckAttr;
1108 }
1109
1110 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
1111 lck_mtx_ext_init(lck_ext, grp, lck_attr);
1112 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
1113 lck->lck_mtx_ptr = lck_ext;
1114 } else {
1115 lck->lck_mtx_owner = 0;
1116 lck->lck_mtx_state = 0;
1117 }
1118 lck->lck_mtx_pad32 = 0xFFFFFFFF;
1119
1120 lck_grp_reference(grp);
1121 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
1122 }
1123
1124 static void
lck_mtx_lock_mark_destroyed(lck_mtx_t * mutex,boolean_t indirect)1125 lck_mtx_lock_mark_destroyed(
1126 lck_mtx_t *mutex,
1127 boolean_t indirect)
1128 {
1129 uint32_t state;
1130
1131 if (indirect) {
1132 /* convert to destroyed state */
1133 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
1134 return;
1135 }
1136
1137 state = ordered_load_mtx_state(mutex);
1138 lck_mtx_interlock_lock(mutex, &state);
1139
1140 ordered_store_mtx_state_release(mutex, LCK_MTX_TAG_DESTROYED);
1141
1142 enable_preemption();
1143 }
1144
1145 /*
1146 * Routine: lck_mtx_destroy
1147 */
1148 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1149 lck_mtx_destroy(
1150 lck_mtx_t *lck,
1151 lck_grp_t *grp)
1152 {
1153 boolean_t indirect;
1154
1155 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
1156 return;
1157 }
1158 #if MACH_LDEBUG
1159 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1160 #endif
1161 indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1162
1163 lck_mtx_lock_mark_destroyed(lck, indirect);
1164
1165 if (indirect) {
1166 zfree(KT_LCK_MTX_EXT, lck->lck_mtx_ptr);
1167 }
1168 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1169 lck_grp_deallocate(grp);
1170 return;
1171 }
1172
1173
1174 #if DEVELOPMENT | DEBUG
1175 __attribute__((noinline))
1176 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1177 lck_mtx_owner_check_panic(
1178 lck_mtx_t *lock)
1179 {
1180 thread_t owner = (thread_t)lock->lck_mtx_owner;
1181 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1182 }
1183 #endif
1184
1185 __attribute__((always_inline))
1186 static boolean_t
get_indirect_mutex(lck_mtx_t ** lock,uint32_t * state)1187 get_indirect_mutex(
1188 lck_mtx_t **lock,
1189 uint32_t *state)
1190 {
1191 *lock = &((*lock)->lck_mtx_ptr->lck_mtx);
1192 *state = ordered_load_mtx_state(*lock);
1193 return TRUE;
1194 }
1195
1196 /*
1197 * Routine: lck_mtx_unlock_slow
1198 *
1199 * Unlocks a mutex held by current thread.
1200 *
1201 * It will wake up waiters if necessary.
1202 *
1203 * Interlock can be held.
1204 */
1205 __attribute__((noinline))
1206 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1207 lck_mtx_unlock_slow(
1208 lck_mtx_t *lock)
1209 {
1210 thread_t thread;
1211 uint32_t state, prev;
1212 boolean_t indirect = FALSE;
1213
1214 state = ordered_load_mtx_state(lock);
1215
1216 /* Is this an indirect mutex? */
1217 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1218 indirect = get_indirect_mutex(&lock, &state);
1219 }
1220
1221 thread = current_thread();
1222
1223 #if DEVELOPMENT | DEBUG
1224 thread_t owner = (thread_t)lock->lck_mtx_owner;
1225 if (__improbable(owner != thread)) {
1226 lck_mtx_owner_check_panic(lock);
1227 }
1228 #endif
1229
1230 /* check if it is held as a spinlock */
1231 if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1232 goto unlock;
1233 }
1234
1235 lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1236
1237 unlock:
1238 /* preemption disabled, interlock held and mutex not held */
1239
1240 /* clear owner */
1241 ordered_store_mtx_owner(lock, 0);
1242 /* keep original state in prev for later evaluation */
1243 prev = state;
1244
1245 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1246 #if MACH_LDEBUG
1247 if (thread) {
1248 thread->mutex_count--;
1249 }
1250 #endif
1251 return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
1252 }
1253
1254 /* release interlock, promotion and clear spin flag */
1255 state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1256 ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
1257
1258 #if MACH_LDEBUG
1259 /* perform lock statistics after drop to prevent delay */
1260 if (thread) {
1261 thread->mutex_count--; /* lock statistic */
1262 }
1263 #endif /* MACH_LDEBUG */
1264
1265 /* re-enable preemption */
1266 lck_mtx_unlock_finish_inline(lock, FALSE);
1267
1268 return;
1269 }
1270
1271 #define LCK_MTX_LCK_WAIT_CODE 0x20
1272 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1273 #define LCK_MTX_LCK_SPIN_CODE 0x22
1274 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1275 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1276
1277 /*
1278 * Routine: lck_mtx_unlock_wakeup_tail
1279 *
1280 * Invoked on unlock when there is
1281 * contention, i.e. the assembly routine sees
1282 * that mutex->lck_mtx_waiters != 0
1283 *
1284 * neither the mutex or interlock is held
1285 *
1286 * Note that this routine might not be called if there are pending
1287 * waiters which have previously been woken up, and they didn't
1288 * end up boosting the old owner.
1289 *
1290 * assembly routine previously did the following to mutex:
1291 * (after saving the state in prior_lock_state)
1292 * decremented lck_mtx_waiters if nonzero
1293 *
1294 * This function needs to be called as a tail call
1295 * to optimize the compiled code.
1296 */
1297 __attribute__((noinline))
1298 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state,boolean_t indirect)1299 lck_mtx_unlock_wakeup_tail(
1300 lck_mtx_t *mutex,
1301 uint32_t state,
1302 boolean_t indirect)
1303 {
1304 struct turnstile *ts;
1305
1306 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1307 kern_return_t did_wake;
1308
1309 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1310 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1311
1312 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1313
1314 if (mutex->lck_mtx_waiters > 1) {
1315 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1316 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1317 } else {
1318 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1319 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1320 }
1321 assert(did_wake == KERN_SUCCESS);
1322
1323 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1324 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1325
1326 state -= LCK_MTX_WAITER;
1327 state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1328 ordered_store_mtx_state_release(mutex, state);
1329
1330 assert(current_thread()->turnstile != NULL);
1331
1332 turnstile_cleanup();
1333
1334 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1335 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1336
1337 lck_mtx_unlock_finish_inline(mutex, indirect);
1338 }
1339
1340 /*
1341 * Routine: lck_mtx_lock_acquire_x86
1342 *
1343 * Invoked on acquiring the mutex when there is
1344 * contention (i.e. the assembly routine sees that
1345 * that mutex->lck_mtx_waiters != 0
1346 *
1347 * mutex is owned... interlock is held... preemption is disabled
1348 */
1349 __attribute__((always_inline))
1350 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1351 lck_mtx_lock_acquire_inline(
1352 lck_mtx_t *mutex,
1353 struct turnstile *ts)
1354 {
1355 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1356
1357 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1358 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1359
1360 thread_t thread = (thread_t)mutex->lck_mtx_owner; /* faster than current_thread() */
1361
1362 if (mutex->lck_mtx_waiters > 0) {
1363 if (ts == NULL) {
1364 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1365 }
1366
1367 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1368 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1369 }
1370
1371 if (ts != NULL) {
1372 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1373 }
1374
1375 assert(current_thread()->turnstile != NULL);
1376
1377 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1378 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1379 }
1380
1381 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1382 lck_mtx_lock_acquire_x86(
1383 lck_mtx_t *mutex)
1384 {
1385 return lck_mtx_lock_acquire_inline(mutex, NULL);
1386 }
1387
1388 /*
1389 * Tail call helpers for lock functions that perform
1390 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1391 * the caller's compiled code.
1392 */
1393
1394 __attribute__((noinline))
1395 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,boolean_t indirect,struct turnstile * ts)1396 lck_mtx_lock_acquire_tail(
1397 lck_mtx_t *mutex,
1398 boolean_t indirect,
1399 struct turnstile *ts)
1400 {
1401 lck_mtx_lock_acquire_inline(mutex, ts);
1402 lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex), indirect);
1403 }
1404
1405 __attribute__((noinline))
1406 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1407 lck_mtx_try_lock_acquire_tail(
1408 lck_mtx_t *mutex)
1409 {
1410 lck_mtx_lock_acquire_inline(mutex, NULL);
1411 lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1412
1413 return TRUE;
1414 }
1415
1416 __attribute__((noinline))
1417 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1418 lck_mtx_convert_spin_acquire_tail(
1419 lck_mtx_t *mutex)
1420 {
1421 lck_mtx_lock_acquire_inline(mutex, NULL);
1422 lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1423 }
1424
1425 boolean_t
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1426 lck_mtx_ilk_unlock(
1427 lck_mtx_t *mutex)
1428 {
1429 lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1430 return TRUE;
1431 }
1432
1433 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1434 lck_mtx_interlock_lock_set_and_clear_flags(
1435 lck_mtx_t *mutex,
1436 uint32_t xor_flags,
1437 uint32_t and_flags,
1438 uint32_t *new_state)
1439 {
1440 uint32_t state, prev;
1441 state = *new_state;
1442
1443 for (;;) {
1444 /* have to wait for interlock to clear */
1445 while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1446 cpu_pause();
1447 state = ordered_load_mtx_state(mutex);
1448 }
1449 prev = state; /* prev contains snapshot for exchange */
1450 state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
1451 state &= ~and_flags; /* clear flags */
1452
1453 disable_preemption();
1454 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1455 break;
1456 }
1457 enable_preemption();
1458 cpu_pause();
1459 state = ordered_load_mtx_state(mutex);
1460 }
1461 *new_state = state;
1462 return;
1463 }
1464
1465 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1466 lck_mtx_interlock_lock_clear_flags(
1467 lck_mtx_t *mutex,
1468 uint32_t and_flags,
1469 uint32_t *new_state)
1470 {
1471 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1472 }
1473
1474 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1475 lck_mtx_interlock_lock(
1476 lck_mtx_t *mutex,
1477 uint32_t *new_state)
1478 {
1479 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1480 }
1481
1482 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1483 lck_mtx_interlock_try_lock_set_flags(
1484 lck_mtx_t *mutex,
1485 uint32_t or_flags,
1486 uint32_t *new_state)
1487 {
1488 uint32_t state, prev;
1489 state = *new_state;
1490
1491 /* have to wait for interlock to clear */
1492 if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1493 return 0;
1494 }
1495 prev = state; /* prev contains snapshot for exchange */
1496 state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
1497 disable_preemption();
1498 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1499 *new_state = state;
1500 return 1;
1501 }
1502
1503 enable_preemption();
1504 return 0;
1505 }
1506
1507 __attribute__((noinline))
1508 static void
lck_mtx_lock_contended(lck_mtx_t * lock,boolean_t indirect,boolean_t * first_miss)1509 lck_mtx_lock_contended(
1510 lck_mtx_t *lock,
1511 boolean_t indirect,
1512 boolean_t *first_miss)
1513 {
1514 lck_mtx_spinwait_ret_type_t ret;
1515 uint32_t state;
1516 thread_t thread;
1517 struct turnstile *ts = NULL;
1518
1519 try_again:
1520
1521 if (indirect) {
1522 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, first_miss);
1523 }
1524
1525 ret = lck_mtx_lock_spinwait_x86(lock);
1526 state = ordered_load_mtx_state(lock);
1527 switch (ret) {
1528 case LCK_MTX_SPINWAIT_NO_SPIN:
1529 /*
1530 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1531 * try to spin.
1532 */
1533 if (indirect) {
1534 lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
1535 }
1536
1537 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
1538 OS_FALLTHROUGH;
1539 case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1540 case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1541 case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1542 case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1543 /*
1544 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1545 * interlock not held
1546 */
1547 lck_mtx_interlock_lock(lock, &state);
1548 assert(state & LCK_MTX_ILOCKED_MSK);
1549
1550 if (state & LCK_MTX_MLOCKED_MSK) {
1551 if (indirect) {
1552 lck_grp_mtx_update_wait((struct _lck_mtx_ext_*)lock, first_miss);
1553 }
1554 lck_mtx_lock_wait_x86(lock, &ts);
1555 /*
1556 * interlock is not held here.
1557 */
1558 goto try_again;
1559 } else {
1560 /* grab the mutex */
1561 state |= LCK_MTX_MLOCKED_MSK;
1562 ordered_store_mtx_state_release(lock, state);
1563 thread = current_thread();
1564 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1565 #if MACH_LDEBUG
1566 if (thread) {
1567 thread->mutex_count++;
1568 }
1569 #endif /* MACH_LDEBUG */
1570 }
1571
1572 break;
1573 case LCK_MTX_SPINWAIT_ACQUIRED:
1574 /*
1575 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1576 * interlock is held and preemption disabled
1577 * owner is set and mutex marked as locked
1578 * statistics updated too
1579 */
1580 break;
1581 default:
1582 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1583 }
1584
1585 /*
1586 * interlock is already acquired here
1587 */
1588
1589 /* mutex has been acquired */
1590 thread = (thread_t)lock->lck_mtx_owner;
1591 if (state & LCK_MTX_WAITERS_MSK) {
1592 /*
1593 * lck_mtx_lock_acquire_tail will call
1594 * turnstile_complete.
1595 */
1596 return lck_mtx_lock_acquire_tail(lock, indirect, ts);
1597 }
1598
1599 if (ts != NULL) {
1600 turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1601 }
1602
1603 assert(current_thread()->turnstile != NULL);
1604
1605 /* release the interlock */
1606 lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock), indirect);
1607 }
1608
1609 /*
1610 * Helper noinline functions for calling
1611 * panic to optimize compiled code.
1612 */
1613
1614 __attribute__((noinline)) __abortlike
1615 static void
lck_mtx_destroyed(lck_mtx_t * lock)1616 lck_mtx_destroyed(
1617 lck_mtx_t *lock)
1618 {
1619 panic("trying to interlock destroyed mutex (%p)", lock);
1620 }
1621
1622 __attribute__((noinline))
1623 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1624 lck_mtx_try_destroyed(
1625 lck_mtx_t *lock)
1626 {
1627 panic("trying to interlock destroyed mutex (%p)", lock);
1628 return FALSE;
1629 }
1630
1631 __attribute__((always_inline))
1632 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1633 lck_mtx_lock_wait_interlock_to_clear(
1634 lck_mtx_t *lock,
1635 uint32_t* new_state)
1636 {
1637 uint32_t state;
1638
1639 for (;;) {
1640 cpu_pause();
1641 state = ordered_load_mtx_state(lock);
1642 if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1643 *new_state = state;
1644 return TRUE;
1645 }
1646 if (state & LCK_MTX_MLOCKED_MSK) {
1647 /* if it is held as mutex, just fail */
1648 return FALSE;
1649 }
1650 }
1651 }
1652
1653 __attribute__((always_inline))
1654 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1655 lck_mtx_try_lock_wait_interlock_to_clear(
1656 lck_mtx_t *lock,
1657 uint32_t* new_state)
1658 {
1659 uint32_t state;
1660
1661 for (;;) {
1662 cpu_pause();
1663 state = ordered_load_mtx_state(lock);
1664 if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1665 /* if it is held as mutex or spin, just fail */
1666 return FALSE;
1667 }
1668 if (!(state & LCK_MTX_ILOCKED_MSK)) {
1669 *new_state = state;
1670 return TRUE;
1671 }
1672 }
1673 }
1674
1675 /*
1676 * Routine: lck_mtx_lock_slow
1677 *
1678 * Locks a mutex for current thread.
1679 * If the lock is contended this function might
1680 * sleep.
1681 *
1682 * Called with interlock not held.
1683 */
1684 __attribute__((noinline))
1685 void
lck_mtx_lock_slow(lck_mtx_t * lock)1686 lck_mtx_lock_slow(
1687 lck_mtx_t *lock)
1688 {
1689 boolean_t indirect = FALSE;
1690 uint32_t state;
1691 int first_miss = 0;
1692
1693 state = ordered_load_mtx_state(lock);
1694
1695 /* is the interlock or mutex held */
1696 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1697 /*
1698 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1699 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1700 * set in state (state == lck_mtx_tag)
1701 */
1702
1703
1704 /* is the mutex already held and not indirect */
1705 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1706 /* no, must have been the mutex */
1707 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1708 }
1709
1710 /* check to see if it is marked destroyed */
1711 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1712 lck_mtx_destroyed(lock);
1713 }
1714
1715 /* Is this an indirect mutex? */
1716 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1717 indirect = get_indirect_mutex(&lock, &state);
1718
1719 first_miss = 0;
1720 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1721
1722 if (state & LCK_MTX_SPIN_MSK) {
1723 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1724 assert(state & LCK_MTX_ILOCKED_MSK);
1725 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1726 }
1727 }
1728
1729 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1730 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1731 }
1732 }
1733
1734 /* no - can't be INDIRECT, DESTROYED or locked */
1735 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1736 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1737 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1738 }
1739 }
1740
1741 /* lock and interlock acquired */
1742
1743 thread_t thread = current_thread();
1744 /* record owner of mutex */
1745 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1746
1747 #if MACH_LDEBUG
1748 if (thread) {
1749 thread->mutex_count++; /* lock statistic */
1750 }
1751 #endif
1752 /*
1753 * Check if there are waiters to
1754 * inherit their priority.
1755 */
1756 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1757 return lck_mtx_lock_acquire_tail(lock, indirect, NULL);
1758 }
1759
1760 /* release the interlock */
1761 lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock), indirect);
1762
1763 return;
1764 }
1765
1766 __attribute__((noinline))
1767 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1768 lck_mtx_try_lock_slow(
1769 lck_mtx_t *lock)
1770 {
1771 boolean_t indirect = FALSE;
1772 uint32_t state;
1773 int first_miss = 0;
1774
1775 state = ordered_load_mtx_state(lock);
1776
1777 /* is the interlock or mutex held */
1778 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1779 /*
1780 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1781 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1782 * set in state (state == lck_mtx_tag)
1783 */
1784
1785 /* is the mutex already held and not indirect */
1786 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1787 return FALSE;
1788 }
1789
1790 /* check to see if it is marked destroyed */
1791 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1792 lck_mtx_try_destroyed(lock);
1793 }
1794
1795 /* Is this an indirect mutex? */
1796 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1797 indirect = get_indirect_mutex(&lock, &state);
1798
1799 first_miss = 0;
1800 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1801 }
1802
1803 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1804 if (indirect) {
1805 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1806 }
1807 return FALSE;
1808 }
1809 }
1810
1811 /* no - can't be INDIRECT, DESTROYED or locked */
1812 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1813 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1814 if (indirect) {
1815 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1816 }
1817 return FALSE;
1818 }
1819 }
1820
1821 /* lock and interlock acquired */
1822
1823 thread_t thread = current_thread();
1824 /* record owner of mutex */
1825 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1826
1827 #if MACH_LDEBUG
1828 if (thread) {
1829 thread->mutex_count++; /* lock statistic */
1830 }
1831 #endif
1832 /*
1833 * Check if there are waiters to
1834 * inherit their priority.
1835 */
1836 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1837 return lck_mtx_try_lock_acquire_tail(lock);
1838 }
1839
1840 /* release the interlock */
1841 lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1842
1843 return TRUE;
1844 }
1845
1846 __attribute__((noinline))
1847 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1848 lck_mtx_lock_spin_slow(
1849 lck_mtx_t *lock)
1850 {
1851 boolean_t indirect = FALSE;
1852 uint32_t state;
1853 int first_miss = 0;
1854
1855 state = ordered_load_mtx_state(lock);
1856
1857 /* is the interlock or mutex held */
1858 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1859 /*
1860 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1861 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1862 * set in state (state == lck_mtx_tag)
1863 */
1864
1865
1866 /* is the mutex already held and not indirect */
1867 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1868 /* no, must have been the mutex */
1869 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1870 }
1871
1872 /* check to see if it is marked destroyed */
1873 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1874 lck_mtx_destroyed(lock);
1875 }
1876
1877 /* Is this an indirect mutex? */
1878 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1879 indirect = get_indirect_mutex(&lock, &state);
1880
1881 first_miss = 0;
1882 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1883
1884 if (state & LCK_MTX_SPIN_MSK) {
1885 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1886 assert(state & LCK_MTX_ILOCKED_MSK);
1887 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1888 }
1889 }
1890
1891 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1892 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1893 }
1894 }
1895
1896 /* no - can't be INDIRECT, DESTROYED or locked */
1897 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1898 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1899 return lck_mtx_lock_contended(lock, indirect, &first_miss);
1900 }
1901 }
1902
1903 /* lock as spinlock and interlock acquired */
1904
1905 thread_t thread = current_thread();
1906 /* record owner of mutex */
1907 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1908
1909 #if MACH_LDEBUG
1910 if (thread) {
1911 thread->mutex_count++; /* lock statistic */
1912 }
1913 #endif
1914
1915 #if CONFIG_DTRACE
1916 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
1917 #endif
1918 /* return with the interlock held and preemption disabled */
1919 return;
1920 }
1921
1922 __attribute__((noinline))
1923 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1924 lck_mtx_try_lock_spin_slow(
1925 lck_mtx_t *lock)
1926 {
1927 boolean_t indirect = FALSE;
1928 uint32_t state;
1929 int first_miss = 0;
1930
1931 state = ordered_load_mtx_state(lock);
1932
1933 /* is the interlock or mutex held */
1934 if (__improbable(state & ((LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))) {
1935 /*
1936 * Note: both LCK_MTX_TAG_DESTROYED and LCK_MTX_TAG_INDIRECT
1937 * have LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK
1938 * set in state (state == lck_mtx_tag)
1939 */
1940
1941 /* is the mutex already held and not indirect */
1942 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1943 return FALSE;
1944 }
1945
1946 /* check to see if it is marked destroyed */
1947 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1948 lck_mtx_try_destroyed(lock);
1949 }
1950
1951 /* Is this an indirect mutex? */
1952 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
1953 indirect = get_indirect_mutex(&lock, &state);
1954
1955 first_miss = 0;
1956 lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
1957 }
1958
1959 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1960 if (indirect) {
1961 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1962 }
1963 return FALSE;
1964 }
1965 }
1966
1967 /* no - can't be INDIRECT, DESTROYED or locked */
1968 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1969 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1970 if (indirect) {
1971 lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
1972 }
1973 return FALSE;
1974 }
1975 }
1976
1977 /* lock and interlock acquired */
1978
1979 thread_t thread = current_thread();
1980 /* record owner of mutex */
1981 ordered_store_mtx_owner(lock, (uintptr_t)thread);
1982
1983 #if MACH_LDEBUG
1984 if (thread) {
1985 thread->mutex_count++; /* lock statistic */
1986 }
1987 #endif
1988
1989 #if CONFIG_DTRACE
1990 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
1991 #endif
1992 return TRUE;
1993 }
1994
1995 __attribute__((noinline))
1996 void
lck_mtx_convert_spin(lck_mtx_t * lock)1997 lck_mtx_convert_spin(
1998 lck_mtx_t *lock)
1999 {
2000 uint32_t state;
2001
2002 state = ordered_load_mtx_state(lock);
2003
2004 /* Is this an indirect mutex? */
2005 if (__improbable(state == LCK_MTX_TAG_INDIRECT)) {
2006 /* If so, take indirection */
2007 get_indirect_mutex(&lock, &state);
2008 }
2009
2010 assertf((thread_t)lock->lck_mtx_owner == current_thread(), "lock %p not owned by thread %p (current owner %p)", lock, current_thread(), (thread_t)lock->lck_mtx_owner );
2011
2012 if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
2013 /* already owned as a mutex, just return */
2014 return;
2015 }
2016
2017 assert(get_preemption_level() > 0);
2018 assert(state & LCK_MTX_ILOCKED_MSK);
2019 assert(state & LCK_MTX_SPIN_MSK);
2020
2021 /*
2022 * Check if there are waiters to
2023 * inherit their priority.
2024 */
2025 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
2026 return lck_mtx_convert_spin_acquire_tail(lock);
2027 }
2028
2029 lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
2030
2031 return;
2032 }
2033
2034 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)2035 lck_mtx_lock_grab_mutex(
2036 lck_mtx_t *lock)
2037 {
2038 uint32_t state;
2039
2040 state = ordered_load_mtx_state(lock);
2041
2042 if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
2043 return FALSE;
2044 }
2045
2046 /* lock and interlock acquired */
2047
2048 thread_t thread = current_thread();
2049 /* record owner of mutex */
2050 ordered_store_mtx_owner(lock, (uintptr_t)thread);
2051
2052 #if MACH_LDEBUG
2053 if (thread) {
2054 thread->mutex_count++; /* lock statistic */
2055 }
2056 #endif
2057 return TRUE;
2058 }
2059
2060 __attribute__((noinline))
2061 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)2062 lck_mtx_assert(
2063 lck_mtx_t *lock,
2064 unsigned int type)
2065 {
2066 thread_t thread, owner;
2067 uint32_t state;
2068
2069 thread = current_thread();
2070 state = ordered_load_mtx_state(lock);
2071
2072 if (state == LCK_MTX_TAG_INDIRECT) {
2073 get_indirect_mutex(&lock, &state);
2074 }
2075
2076 owner = (thread_t)lock->lck_mtx_owner;
2077
2078 if (type == LCK_MTX_ASSERT_OWNED) {
2079 if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
2080 panic("mutex (%p) not owned", lock);
2081 }
2082 } else {
2083 assert(type == LCK_MTX_ASSERT_NOTOWNED);
2084 if (owner == thread) {
2085 panic("mutex (%p) owned", lock);
2086 }
2087 }
2088 }
2089
2090 /*
2091 * Routine: lck_mtx_lock_spinwait_x86
2092 *
2093 * Invoked trying to acquire a mutex when there is contention but
2094 * the holder is running on another processor. We spin for up to a maximum
2095 * time waiting for the lock to be released.
2096 *
2097 * Called with the interlock unlocked.
2098 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
2099 * returns LCK_MTX_SPINWAIT_SPUN if we spun
2100 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
2101 */
2102 __attribute__((noinline))
2103 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)2104 lck_mtx_lock_spinwait_x86(
2105 lck_mtx_t *mutex)
2106 {
2107 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2108 thread_t owner, prev_owner;
2109 uint64_t window_deadline, sliding_deadline, high_deadline;
2110 uint64_t start_time, cur_time, avg_hold_time, bias, delta;
2111 lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2112 int loopcount = 0;
2113 int total_hold_time_samples, window_hold_time_samples, unfairness;
2114 uint i, prev_owner_cpu;
2115 bool owner_on_core, adjust;
2116
2117 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
2118 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
2119
2120 start_time = mach_absolute_time();
2121 /*
2122 * window_deadline represents the "learning" phase.
2123 * The thread collects statistics about the lock during
2124 * window_deadline and then it makes a decision on whether to spin more
2125 * or block according to the concurrency behavior
2126 * observed.
2127 *
2128 * Every thread can spin at least low_MutexSpin.
2129 */
2130 window_deadline = start_time + low_MutexSpin;
2131 /*
2132 * Sliding_deadline is the adjusted spin deadline
2133 * computed after the "learning" phase.
2134 */
2135 sliding_deadline = window_deadline;
2136 /*
2137 * High_deadline is a hard deadline. No thread
2138 * can spin more than this deadline.
2139 */
2140 if (high_MutexSpin >= 0) {
2141 high_deadline = start_time + high_MutexSpin;
2142 } else {
2143 high_deadline = start_time + low_MutexSpin * real_ncpus;
2144 }
2145
2146 /*
2147 * Do not know yet which is the owner cpu.
2148 * Initialize prev_owner_cpu with next cpu.
2149 */
2150 prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
2151 total_hold_time_samples = 0;
2152 window_hold_time_samples = 0;
2153 avg_hold_time = 0;
2154 adjust = TRUE;
2155 bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
2156
2157 prev_owner = (thread_t) mutex->lck_mtx_owner;
2158 /*
2159 * Spin while:
2160 * - mutex is locked, and
2161 * - it's locked as a spin lock, and
2162 * - owner is running on another processor, and
2163 * - we haven't spun for long enough.
2164 */
2165 do {
2166 /*
2167 * Try to acquire the lock.
2168 */
2169 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
2170 retval = LCK_MTX_SPINWAIT_ACQUIRED;
2171 break;
2172 }
2173
2174 cur_time = mach_absolute_time();
2175
2176 /*
2177 * Never spin past high_deadline.
2178 */
2179 if (cur_time >= high_deadline) {
2180 retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2181 break;
2182 }
2183
2184 /*
2185 * Check if owner is on core. If not block.
2186 */
2187 owner = (thread_t) mutex->lck_mtx_owner;
2188 if (owner) {
2189 i = prev_owner_cpu;
2190 owner_on_core = FALSE;
2191
2192 disable_preemption();
2193 owner = (thread_t) mutex->lck_mtx_owner;
2194
2195 /*
2196 * For scalability we want to check if the owner is on core
2197 * without locking the mutex interlock.
2198 * If we do not lock the mutex interlock, the owner that we see might be
2199 * invalid, so we cannot dereference it. Therefore we cannot check
2200 * any field of the thread to tell us if it is on core.
2201 * Check if the thread that is running on the other cpus matches the owner.
2202 */
2203 if (owner) {
2204 do {
2205 if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner)) {
2206 owner_on_core = TRUE;
2207 break;
2208 }
2209 if (++i >= real_ncpus) {
2210 i = 0;
2211 }
2212 } while (i != prev_owner_cpu);
2213 enable_preemption();
2214
2215 if (owner_on_core) {
2216 prev_owner_cpu = i;
2217 } else {
2218 prev_owner = owner;
2219 owner = (thread_t) mutex->lck_mtx_owner;
2220 if (owner == prev_owner) {
2221 /*
2222 * Owner is not on core.
2223 * Stop spinning.
2224 */
2225 if (loopcount == 0) {
2226 retval = LCK_MTX_SPINWAIT_NO_SPIN;
2227 } else {
2228 retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2229 }
2230 break;
2231 }
2232 /*
2233 * Fall through if the owner changed while we were scanning.
2234 * The new owner could potentially be on core, so loop
2235 * again.
2236 */
2237 }
2238 } else {
2239 enable_preemption();
2240 }
2241 }
2242
2243 /*
2244 * Save how many times we see the owner changing.
2245 * We can roughly estimate the mutex hold
2246 * time and the fairness with that.
2247 */
2248 if (owner != prev_owner) {
2249 prev_owner = owner;
2250 total_hold_time_samples++;
2251 window_hold_time_samples++;
2252 }
2253
2254 /*
2255 * Learning window expired.
2256 * Try to adjust the sliding_deadline.
2257 */
2258 if (cur_time >= window_deadline) {
2259 /*
2260 * If there was not contention during the window
2261 * stop spinning.
2262 */
2263 if (window_hold_time_samples < 1) {
2264 retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2265 break;
2266 }
2267
2268 if (adjust) {
2269 /*
2270 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2271 * but the lock is unfair, so let's try to estimate by how much.
2272 */
2273 unfairness = total_hold_time_samples / real_ncpus;
2274
2275 if (unfairness == 0) {
2276 /*
2277 * We observed the owner changing `total_hold_time_samples` times which
2278 * let us estimate the average hold time of this mutex for the duration
2279 * of the spin time.
2280 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2281 *
2282 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2283 */
2284 delta = cur_time - start_time;
2285 sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2286 } else {
2287 /*
2288 * In this case at least one of the other cpus was able to get the lock twice
2289 * while I was spinning.
2290 * We could spin longer but it won't necessarily help if the system is unfair.
2291 * Try to randomize the wait to reduce contention.
2292 *
2293 * We compute how much time we could potentially spin
2294 * and distribute it over the cpus.
2295 *
2296 * bias is an integer between 0 and real_ncpus.
2297 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2298 */
2299 delta = high_deadline - cur_time;
2300 sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2301 adjust = FALSE;
2302 }
2303 }
2304
2305 window_deadline += low_MutexSpin;
2306 window_hold_time_samples = 0;
2307 }
2308
2309 /*
2310 * Stop spinning if we past
2311 * the adjusted deadline.
2312 */
2313 if (cur_time >= sliding_deadline) {
2314 retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2315 break;
2316 }
2317
2318 if ((thread_t) mutex->lck_mtx_owner != NULL) {
2319 cpu_pause();
2320 }
2321
2322 loopcount++;
2323 } while (TRUE);
2324
2325 #if CONFIG_DTRACE
2326 /*
2327 * Note that we record a different probe id depending on whether
2328 * this is a direct or indirect mutex. This allows us to
2329 * penalize only lock groups that have debug/stats enabled
2330 * with dtrace processing if desired.
2331 */
2332 if (__probable(mutex->lck_mtx_is_ext == 0)) {
2333 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
2334 mach_absolute_time() - start_time);
2335 } else {
2336 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
2337 mach_absolute_time() - start_time);
2338 }
2339 /* The lockstat acquire event is recorded by the assembly code beneath us. */
2340 #endif
2341
2342 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2343 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
2344
2345 return retval;
2346 }
2347
2348
2349
2350 /*
2351 * Routine: lck_mtx_lock_wait_x86
2352 *
2353 * Invoked in order to wait on contention.
2354 *
2355 * Called with the interlock locked and
2356 * preemption disabled...
2357 * returns it unlocked and with preemption enabled
2358 *
2359 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2360 * A runnable waiter can exist between wait and acquire
2361 * without a waiters count being set.
2362 * This allows us to never make a spurious wakeup call.
2363 *
2364 * Priority:
2365 * This avoids taking the thread lock if the owning thread is the same priority.
2366 * This optimizes the case of same-priority threads contending on a lock.
2367 * However, that allows the owning thread to drop in priority while holding the lock,
2368 * because there is no state that the priority change can notice that
2369 * says that the targeted thread holds a contended mutex.
2370 *
2371 * One possible solution: priority changes could look for some atomic tag
2372 * on the thread saying 'holding contended lock', and then set up a promotion.
2373 * Needs a story for dropping that promotion - the last contended unlock
2374 * has to notice that this has happened.
2375 */
2376 __attribute__((noinline))
2377 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2378 lck_mtx_lock_wait_x86(
2379 lck_mtx_t *mutex,
2380 struct turnstile **ts)
2381 {
2382 thread_t self = current_thread();
2383
2384 #if CONFIG_DTRACE
2385 uint64_t sleep_start = 0;
2386
2387 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
2388 sleep_start = mach_absolute_time();
2389 }
2390 #endif
2391 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2392
2393 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2394 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
2395 mutex->lck_mtx_waiters, 0, 0);
2396
2397 mutex->lck_mtx_waiters++;
2398
2399 thread_t holder = (thread_t)mutex->lck_mtx_owner;
2400 assert(holder != NULL);
2401
2402 /*
2403 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2404 * the same turnstile while looping, the matching turnstile compleate will be called
2405 * by lck_mtx_lock_contended when finally acquiring the lock.
2406 */
2407 if (*ts == NULL) {
2408 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2409 }
2410
2411 struct turnstile *turnstile = *ts;
2412 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2413 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2414
2415 waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2416
2417 lck_mtx_ilk_unlock(mutex);
2418
2419 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2420
2421 thread_block(THREAD_CONTINUE_NULL);
2422
2423 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2424 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
2425 mutex->lck_mtx_waiters, 0, 0);
2426
2427 #if CONFIG_DTRACE
2428 /*
2429 * Record the Dtrace lockstat probe for blocking, block time
2430 * measured from when we were entered.
2431 */
2432 if (sleep_start) {
2433 if (mutex->lck_mtx_is_ext == 0) {
2434 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
2435 mach_absolute_time() - sleep_start);
2436 } else {
2437 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex,
2438 mach_absolute_time() - sleep_start);
2439 }
2440 }
2441 #endif
2442 }
2443
2444 /*
2445 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2446 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2447 * Returns: TRUE if lock is acquired.
2448 */
2449 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2450 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2451 {
2452 if (not_in_kdp) {
2453 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2454 }
2455
2456 if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2457 return TRUE;
2458 }
2459
2460 return FALSE;
2461 }
2462
2463 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2464 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2465 {
2466 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2467 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2468 thread_t holder = (thread_t)mutex->lck_mtx_owner;
2469 waitinfo->owner = thread_tid(holder);
2470 }
2471