1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/lock.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * Date: 1985
60 *
61 * Locking primitives implementation
62 */
63
64 #define LOCK_PRIVATE 1
65
66 #include <mach_ldebug.h>
67
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <i386/tsc.h>
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
84 #include <i386/mp.h>
85 #include <machine/atomic.h>
86 #include <sys/kdebug.h>
87 #if LCK_MTX_USE_ARCH
88 #include <i386/locks_i386_inlines.h>
89 #endif /* LCK_MTX_USE_ARCH */
90 #include <kern/cpu_number.h>
91 #include <os/hash.h>
92 #include <i386/cpuid.h>
93
94 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95
96 uint64_t _Atomic lock_panic_timeout = 0xf000000; /* 251e6 TSC ticks */
97
98 /* Forwards */
99
100 #if USLOCK_DEBUG
101 /*
102 * Perform simple lock checks.
103 */
104 int uslock_check = 1;
105 int max_lock_loops = 100000000;
106 decl_simple_lock_data(extern, printf_lock);
107 decl_simple_lock_data(extern, panic_lock);
108 #endif /* USLOCK_DEBUG */
109
110 extern unsigned int not_in_kdp;
111
112 #if !LCK_GRP_USE_ARG
113 #define usimple_lock_nopreempt(lck, grp) \
114 usimple_lock_nopreempt(lck)
115 #define usimple_lock_try_nopreempt(lck, grp) \
116 usimple_lock_try_nopreempt(lck)
117 #endif
118 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
119 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
120
121 /*
122 * We often want to know the addresses of the callers
123 * of the various lock routines. However, this information
124 * is only used for debugging and statistics.
125 */
126 typedef void *pc_t;
127 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
128 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
129 #if ANY_LOCK_DEBUG
130 #define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
131 #define DECL_PC(pc) pc_t pc;
132 #else /* ANY_LOCK_DEBUG */
133 #define DECL_PC(pc)
134 #ifdef lint
135 /*
136 * Eliminate lint complaints about unused local pc variables.
137 */
138 #define OBTAIN_PC(pc) ++pc
139 #else /* lint */
140 #define OBTAIN_PC(pc)
141 #endif /* lint */
142 #endif /* USLOCK_DEBUG */
143
144 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
145
146 #if LCK_MTX_USE_ARCH
147 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
148 #endif /* LCK_MTX_USE_ARCH */
149
150 /*
151 * atomic exchange API is a low level abstraction of the operations
152 * to atomically read, modify, and write a pointer. This abstraction works
153 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
154 * well as the ARM exclusive instructions.
155 *
156 * atomic_exchange_begin() - begin exchange and retrieve current value
157 * atomic_exchange_complete() - conclude an exchange
158 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
159 */
160 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)161 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
162 {
163 uint32_t val;
164
165 (void)ord; // Memory order not used
166 val = os_atomic_load(target, relaxed);
167 *previous = val;
168 return val;
169 }
170
171 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)172 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
173 {
174 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
175 }
176
177 void
atomic_exchange_abort(void)178 atomic_exchange_abort(void)
179 {
180 }
181
182 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)183 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
184 {
185 uint32_t value, prev;
186
187 for (;;) {
188 value = atomic_exchange_begin32(target, &prev, ord);
189 if (value & test_mask) {
190 if (wait) {
191 cpu_pause();
192 } else {
193 atomic_exchange_abort();
194 }
195 return FALSE;
196 }
197 value |= set_mask;
198 if (atomic_exchange_complete32(target, prev, value, ord)) {
199 return TRUE;
200 }
201 }
202 }
203
204 /*
205 * Portable lock package implementation of usimple_locks.
206 */
207
208 #if USLOCK_DEBUG
209 #define USLDBG(stmt) stmt
210 void usld_lock_init(usimple_lock_t, unsigned short);
211 void usld_lock_pre(usimple_lock_t, pc_t);
212 void usld_lock_post(usimple_lock_t, pc_t);
213 void usld_unlock(usimple_lock_t, pc_t);
214 void usld_lock_try_pre(usimple_lock_t, pc_t);
215 void usld_lock_try_post(usimple_lock_t, pc_t);
216 int usld_lock_common_checks(usimple_lock_t, char *);
217 #else /* USLOCK_DEBUG */
218 #define USLDBG(stmt)
219 #endif /* USLOCK_DEBUG */
220
221 #if LCK_MTX_USE_ARCH
222 /*
223 * Forward definitions
224 */
225
226 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state);
227 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
228 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
229 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
230 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
231 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
232 #endif /* LCK_MTX_USE_ARCH */
233
234
235 /*
236 * Routine: lck_spin_alloc_init
237 */
238 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)239 lck_spin_alloc_init(
240 lck_grp_t *grp,
241 lck_attr_t *attr)
242 {
243 lck_spin_t *lck;
244
245 lck = zalloc(KT_LCK_SPIN);
246 lck_spin_init(lck, grp, attr);
247 return lck;
248 }
249
250 /*
251 * Routine: lck_spin_free
252 */
253 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)254 lck_spin_free(
255 lck_spin_t *lck,
256 lck_grp_t *grp)
257 {
258 lck_spin_destroy(lck, grp);
259 zfree(KT_LCK_SPIN, lck);
260 }
261
262 /*
263 * Routine: lck_spin_init
264 */
265 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)266 lck_spin_init(
267 lck_spin_t *lck,
268 lck_grp_t *grp,
269 __unused lck_attr_t *attr)
270 {
271 usimple_lock_init((usimple_lock_t) lck, 0);
272 if (grp) {
273 lck_grp_reference(grp, &grp->lck_grp_spincnt);
274 }
275 }
276
277 /*
278 * Routine: lck_spin_destroy
279 */
280 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)281 lck_spin_destroy(
282 lck_spin_t *lck,
283 lck_grp_t *grp)
284 {
285 if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
286 return;
287 }
288 lck->interlock = LCK_SPIN_TAG_DESTROYED;
289 if (grp) {
290 lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
291 }
292 return;
293 }
294
295 /*
296 * Routine: lck_spin_lock
297 */
298 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)299 lck_spin_lock_grp(
300 lck_spin_t *lck,
301 lck_grp_t *grp)
302 {
303 #pragma unused(grp)
304 usimple_lock((usimple_lock_t) lck, grp);
305 }
306
307 void
lck_spin_lock(lck_spin_t * lck)308 lck_spin_lock(
309 lck_spin_t *lck)
310 {
311 usimple_lock((usimple_lock_t) lck, NULL);
312 }
313
314 void
lck_spin_lock_nopreempt(lck_spin_t * lck)315 lck_spin_lock_nopreempt(
316 lck_spin_t *lck)
317 {
318 usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
319 }
320
321 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)322 lck_spin_lock_nopreempt_grp(
323 lck_spin_t *lck,
324 lck_grp_t *grp)
325 {
326 #pragma unused(grp)
327 usimple_lock_nopreempt((usimple_lock_t) lck, grp);
328 }
329
330 /*
331 * Routine: lck_spin_unlock
332 */
333 void
lck_spin_unlock(lck_spin_t * lck)334 lck_spin_unlock(
335 lck_spin_t *lck)
336 {
337 usimple_unlock((usimple_lock_t) lck);
338 }
339
340 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)341 lck_spin_unlock_nopreempt(
342 lck_spin_t *lck)
343 {
344 usimple_unlock_nopreempt((usimple_lock_t) lck);
345 }
346
347 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)348 lck_spin_try_lock_grp(
349 lck_spin_t *lck,
350 lck_grp_t *grp)
351 {
352 #pragma unused(grp)
353 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
354 #if DEVELOPMENT || DEBUG
355 if (lrval) {
356 pltrace(FALSE);
357 }
358 #endif
359 return lrval;
360 }
361
362
363 /*
364 * Routine: lck_spin_try_lock
365 */
366 boolean_t
lck_spin_try_lock(lck_spin_t * lck)367 lck_spin_try_lock(
368 lck_spin_t *lck)
369 {
370 boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
371 #if DEVELOPMENT || DEBUG
372 if (lrval) {
373 pltrace(FALSE);
374 }
375 #endif
376 return lrval;
377 }
378
379 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)380 lck_spin_try_lock_nopreempt(
381 lck_spin_t *lck)
382 {
383 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
384 #if DEVELOPMENT || DEBUG
385 if (lrval) {
386 pltrace(FALSE);
387 }
388 #endif
389 return lrval;
390 }
391
392 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)393 lck_spin_try_lock_nopreempt_grp(
394 lck_spin_t *lck,
395 lck_grp_t *grp)
396 {
397 #pragma unused(grp)
398 boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
399 #if DEVELOPMENT || DEBUG
400 if (lrval) {
401 pltrace(FALSE);
402 }
403 #endif
404 return lrval;
405 }
406
407 /*
408 * Routine: lck_spin_assert
409 */
410 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)411 lck_spin_assert(lck_spin_t *lock, unsigned int type)
412 {
413 thread_t thread, holder;
414 uintptr_t state;
415
416 if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
417 panic("lck_spin_assert(): invalid arg (%u)", type);
418 }
419
420 state = lock->interlock;
421 holder = (thread_t)state;
422 thread = current_thread();
423 if (type == LCK_ASSERT_OWNED) {
424 if (__improbable(holder == THREAD_NULL)) {
425 panic("Lock not owned %p = %lx", lock, state);
426 }
427 if (__improbable(holder != thread)) {
428 panic("Lock not owned by current thread %p = %lx", lock, state);
429 }
430 } else if (type == LCK_ASSERT_NOTOWNED) {
431 if (__improbable(holder != THREAD_NULL)) {
432 if (holder == thread) {
433 panic("Lock owned by current thread %p = %lx", lock, state);
434 }
435 }
436 }
437 }
438
439 /*
440 * Routine: kdp_lck_spin_is_acquired
441 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
442 * Returns: TRUE if lock is acquired.
443 */
444 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)445 kdp_lck_spin_is_acquired(lck_spin_t *lck)
446 {
447 if (not_in_kdp) {
448 panic("panic: spinlock acquired check done outside of kernel debugger");
449 }
450 return (lck->interlock != 0)? TRUE : FALSE;
451 }
452
453 /*
454 * Initialize a usimple_lock.
455 *
456 * No change in preemption state.
457 */
458 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)459 usimple_lock_init(
460 usimple_lock_t l,
461 __unused unsigned short tag)
462 {
463 USLDBG(usld_lock_init(l, tag));
464 hw_lock_init(&l->interlock);
465 }
466
467 static hw_spin_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)468 usimple_lock_acquire_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
469 {
470 usimple_lock_t l = _lock;
471 uintptr_t lowner;
472 lck_spinlock_to_info_t lsti;
473
474 if (machine_timeout_suspended()) {
475 return HW_LOCK_TIMEOUT_CONTINUE;
476 }
477
478 lowner = (uintptr_t)l->interlock.lock_data;
479 lsti = lck_spinlock_timeout_hit(l, lowner);
480
481 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT " ;"
482 "lock owner thread=0x%lx, current_thread: %p, "
483 "lock owner active on CPU %d, "
484 HW_SPIN_TIMEOUT_DETAILS_FMT,
485 l, HW_SPIN_TIMEOUT_ARG(to, st),
486 lowner, current_thread(), lsti->owner_cpu,
487 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
488 }
489
490 static const struct hw_spin_policy usimple_lock_spin_policy = {
491 .hwsp_name = "usimple_lock_t",
492 .hwsp_timeout = &LockTimeOutTSC,
493 .hwsp_op_timeout = usimple_lock_acquire_timeout_panic,
494 };
495
496 /*
497 * Acquire a usimple_lock.
498 *
499 * Returns with preemption disabled. Note
500 * that the hw_lock routines are responsible for
501 * maintaining preemption state.
502 */
503 void
504 (usimple_lock)(
505 usimple_lock_t l
506 LCK_GRP_ARG(lck_grp_t *grp))
507 {
508 DECL_PC(pc);
509
510 OBTAIN_PC(pc);
511 USLDBG(usld_lock_pre(l, pc));
512
513 (void)hw_lock_to(&l->interlock, &usimple_lock_spin_policy, grp);
514 #if DEVELOPMENT || DEBUG
515 pltrace(FALSE);
516 #endif
517
518 USLDBG(usld_lock_post(l, pc));
519 #if CONFIG_DTRACE
520 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
521 #endif
522 }
523
524 /*
525 * Acquire a usimple_lock_nopreempt
526 *
527 * Called and returns with preemption disabled. Note
528 * that the hw_lock routines are responsible for
529 * maintaining preemption state.
530 */
531 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)532 usimple_lock_nopreempt(
533 usimple_lock_t l,
534 lck_grp_t *grp)
535 {
536 DECL_PC(pc);
537
538 OBTAIN_PC(pc);
539 USLDBG(usld_lock_pre(l, pc));
540
541 (void)hw_lock_to_nopreempt(&l->interlock, &usimple_lock_spin_policy, grp);
542
543 #if DEVELOPMENT || DEBUG
544 pltrace(FALSE);
545 #endif
546
547 USLDBG(usld_lock_post(l, pc));
548 #if CONFIG_DTRACE
549 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
550 #endif
551 }
552
553
554 /*
555 * Release a usimple_lock.
556 *
557 * Returns with preemption enabled. Note
558 * that the hw_lock routines are responsible for
559 * maintaining preemption state.
560 */
561 void
usimple_unlock(usimple_lock_t l)562 usimple_unlock(
563 usimple_lock_t l)
564 {
565 DECL_PC(pc);
566
567 OBTAIN_PC(pc);
568 USLDBG(usld_unlock(l, pc));
569 #if DEVELOPMENT || DEBUG
570 pltrace(TRUE);
571 #endif
572 hw_lock_unlock(&l->interlock);
573 }
574
575 /*
576 * Release a usimple_unlock_nopreempt.
577 *
578 * Called and returns with preemption enabled. Note
579 * that the hw_lock routines are responsible for
580 * maintaining preemption state.
581 */
582 void
usimple_unlock_nopreempt(usimple_lock_t l)583 usimple_unlock_nopreempt(
584 usimple_lock_t l)
585 {
586 DECL_PC(pc);
587
588 OBTAIN_PC(pc);
589 USLDBG(usld_unlock(l, pc));
590 #if DEVELOPMENT || DEBUG
591 pltrace(TRUE);
592 #endif
593 hw_lock_unlock_nopreempt(&l->interlock);
594 }
595
596 /*
597 * Conditionally acquire a usimple_lock.
598 *
599 * On success, returns with preemption disabled.
600 * On failure, returns with preemption in the same state
601 * as when first invoked. Note that the hw_lock routines
602 * are responsible for maintaining preemption state.
603 *
604 * XXX No stats are gathered on a miss; I preserved this
605 * behavior from the original assembly-language code, but
606 * doesn't it make sense to log misses? XXX
607 */
608 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)609 usimple_lock_try(
610 usimple_lock_t l,
611 lck_grp_t *grp)
612 {
613 unsigned int success;
614 DECL_PC(pc);
615
616 OBTAIN_PC(pc);
617 USLDBG(usld_lock_try_pre(l, pc));
618 if ((success = hw_lock_try(&l->interlock, grp))) {
619 #if DEVELOPMENT || DEBUG
620 pltrace(FALSE);
621 #endif
622 USLDBG(usld_lock_try_post(l, pc));
623 }
624 return success;
625 }
626
627 void
usimple_lock_assert(usimple_lock_t l,unsigned int type)628 usimple_lock_assert(usimple_lock_t l, unsigned int type)
629 {
630 hw_lock_assert(&l->interlock, type);
631 }
632
633 /*
634 * Conditionally acquire a usimple_lock.
635 *
636 * Called and returns with preemption disabled. Note
637 * that the hw_lock routines are responsible for
638 * maintaining preemption state.
639 *
640 * XXX No stats are gathered on a miss; I preserved this
641 * behavior from the original assembly-language code, but
642 * doesn't it make sense to log misses? XXX
643 */
644 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)645 usimple_lock_try_nopreempt(
646 usimple_lock_t l,
647 lck_grp_t *grp)
648 {
649 unsigned int success;
650 DECL_PC(pc);
651
652 OBTAIN_PC(pc);
653 USLDBG(usld_lock_try_pre(l, pc));
654 if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
655 #if DEVELOPMENT || DEBUG
656 pltrace(FALSE);
657 #endif
658 USLDBG(usld_lock_try_post(l, pc));
659 }
660 return success;
661 }
662
663 /*
664 * Acquire a usimple_lock while polling for pending cpu signals
665 * and spinning on a lock.
666 *
667 */
668 unsigned
669 int
670 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
671 uint64_t deadline
672 LCK_GRP_ARG(lck_grp_t *grp))
673 {
674 boolean_t istate = ml_get_interrupts_enabled();
675
676 if (deadline < mach_absolute_time()) {
677 return 0;
678 }
679
680 while (!simple_lock_try(l, grp)) {
681 if (!istate) {
682 cpu_signal_handler(NULL);
683 }
684
685 if (deadline < mach_absolute_time()) {
686 return 0;
687 }
688
689 cpu_pause();
690 }
691
692 return 1;
693 }
694
695 void
696 (usimple_lock_try_lock_loop)(usimple_lock_t l
697 LCK_GRP_ARG(lck_grp_t *grp))
698 {
699 /* When the lock is not contended, grab the lock and go. */
700 if (!simple_lock_try(l, grp)) {
701 usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
702 }
703 }
704
705 unsigned
706 int
707 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
708 uint64_t duration
709 LCK_GRP_ARG(lck_grp_t *grp))
710 {
711 uint64_t deadline;
712 uint64_t base_at;
713 uint64_t duration_at;
714
715 /* Fast track for uncontended locks */
716 if (simple_lock_try(l, grp)) {
717 return 1;
718 }
719
720 base_at = mach_absolute_time();
721
722 nanoseconds_to_absolutetime(duration, &duration_at);
723 deadline = base_at + duration_at;
724 if (deadline < base_at) {
725 /* deadline has overflowed, make it saturate */
726 deadline = ULLONG_MAX;
727 }
728
729 return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
730 }
731
732 #if USLOCK_DEBUG
733 /*
734 * States of a usimple_lock. The default when initializing
735 * a usimple_lock is setting it up for debug checking.
736 */
737 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
738 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
739 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
740 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
741 #define USLOCK_CHECKING(l) (uslock_check && \
742 ((l)->debug.state & USLOCK_CHECKED))
743
744 /*
745 * Initialize the debugging information contained
746 * in a usimple_lock.
747 */
748 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)749 usld_lock_init(
750 usimple_lock_t l,
751 __unused unsigned short tag)
752 {
753 if (l == USIMPLE_LOCK_NULL) {
754 panic("lock initialization: null lock pointer");
755 }
756 l->lock_type = USLOCK_TAG;
757 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
758 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
759 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
760 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
761 l->debug.duration[0] = l->debug.duration[1] = 0;
762 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
763 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
764 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
765 }
766
767
768 /*
769 * These checks apply to all usimple_locks, not just
770 * those with USLOCK_CHECKED turned on.
771 */
772 int
usld_lock_common_checks(usimple_lock_t l,char * caller)773 usld_lock_common_checks(
774 usimple_lock_t l,
775 char *caller)
776 {
777 if (l == USIMPLE_LOCK_NULL) {
778 panic("%s: null lock pointer", caller);
779 }
780 if (l->lock_type != USLOCK_TAG) {
781 panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
782 }
783 if (!(l->debug.state & USLOCK_INIT)) {
784 panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
785 }
786 return USLOCK_CHECKING(l);
787 }
788
789
790 /*
791 * Debug checks on a usimple_lock just before attempting
792 * to acquire it.
793 */
794 /* ARGSUSED */
795 void
usld_lock_pre(usimple_lock_t l,pc_t pc)796 usld_lock_pre(
797 usimple_lock_t l,
798 pc_t pc)
799 {
800 char caller[] = "usimple_lock";
801
802
803 if (!usld_lock_common_checks(l, caller)) {
804 return;
805 }
806
807 /*
808 * Note that we have a weird case where we are getting a lock when we are]
809 * in the process of putting the system to sleep. We are running with no
810 * current threads, therefore we can't tell if we are trying to retake a lock
811 * we have or someone on the other processor has it. Therefore we just
812 * ignore this test if the locking thread is 0.
813 */
814
815 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
816 l->debug.lock_thread == (void *) current_thread()) {
817 printf("%s: lock %p already locked (at %p) by",
818 caller, l, l->debug.lock_pc);
819 printf(" current thread %p (new attempt at pc %p)\n",
820 l->debug.lock_thread, pc);
821 panic("%s", caller);
822 }
823 mp_disable_preemption();
824 mp_enable_preemption();
825 }
826
827
828 /*
829 * Debug checks on a usimple_lock just after acquiring it.
830 *
831 * Pre-emption has been disabled at this point,
832 * so we are safe in using cpu_number.
833 */
834 void
usld_lock_post(usimple_lock_t l,pc_t pc)835 usld_lock_post(
836 usimple_lock_t l,
837 pc_t pc)
838 {
839 unsigned int mycpu;
840 char caller[] = "successful usimple_lock";
841
842
843 if (!usld_lock_common_checks(l, caller)) {
844 return;
845 }
846
847 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
848 panic("%s: lock %p became uninitialized",
849 caller, l);
850 }
851 if ((l->debug.state & USLOCK_TAKEN)) {
852 panic("%s: lock 0x%p became TAKEN by someone else",
853 caller, l);
854 }
855
856 mycpu = (unsigned int)cpu_number();
857 assert(mycpu <= UCHAR_MAX);
858
859 l->debug.lock_thread = (void *)current_thread();
860 l->debug.state |= USLOCK_TAKEN;
861 l->debug.lock_pc = pc;
862 l->debug.lock_cpu = (unsigned char)mycpu;
863 }
864
865
866 /*
867 * Debug checks on a usimple_lock just before
868 * releasing it. Note that the caller has not
869 * yet released the hardware lock.
870 *
871 * Preemption is still disabled, so there's
872 * no problem using cpu_number.
873 */
874 void
usld_unlock(usimple_lock_t l,pc_t pc)875 usld_unlock(
876 usimple_lock_t l,
877 pc_t pc)
878 {
879 unsigned int mycpu;
880 char caller[] = "usimple_unlock";
881
882
883 if (!usld_lock_common_checks(l, caller)) {
884 return;
885 }
886
887 mycpu = cpu_number();
888 assert(mycpu <= UCHAR_MAX);
889
890 if (!(l->debug.state & USLOCK_TAKEN)) {
891 panic("%s: lock 0x%p hasn't been taken",
892 caller, l);
893 }
894 if (l->debug.lock_thread != (void *) current_thread()) {
895 panic("%s: unlocking lock 0x%p, owned by thread %p",
896 caller, l, l->debug.lock_thread);
897 }
898 if (l->debug.lock_cpu != mycpu) {
899 printf("%s: unlocking lock 0x%p on cpu 0x%x",
900 caller, l, mycpu);
901 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
902 panic("%s", caller);
903 }
904
905 l->debug.unlock_thread = l->debug.lock_thread;
906 l->debug.lock_thread = INVALID_PC;
907 l->debug.state &= ~USLOCK_TAKEN;
908 l->debug.unlock_pc = pc;
909 l->debug.unlock_cpu = (unsigned char)mycpu;
910 }
911
912
913 /*
914 * Debug checks on a usimple_lock just before
915 * attempting to acquire it.
916 *
917 * Preemption isn't guaranteed to be disabled.
918 */
919 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)920 usld_lock_try_pre(
921 usimple_lock_t l,
922 __unused pc_t pc)
923 {
924 char caller[] = "usimple_lock_try";
925
926 if (!usld_lock_common_checks(l, caller)) {
927 return;
928 }
929 }
930
931
932 /*
933 * Debug checks on a usimple_lock just after
934 * successfully attempting to acquire it.
935 *
936 * Preemption has been disabled by the
937 * lock acquisition attempt, so it's safe
938 * to use cpu_number.
939 */
940 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)941 usld_lock_try_post(
942 usimple_lock_t l,
943 pc_t pc)
944 {
945 unsigned int mycpu;
946 char caller[] = "successful usimple_lock_try";
947
948 if (!usld_lock_common_checks(l, caller)) {
949 return;
950 }
951
952 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
953 panic("%s: lock 0x%p became uninitialized",
954 caller, l);
955 }
956 if ((l->debug.state & USLOCK_TAKEN)) {
957 panic("%s: lock 0x%p became TAKEN by someone else",
958 caller, l);
959 }
960
961 mycpu = cpu_number();
962 assert(mycpu <= UCHAR_MAX);
963
964 l->debug.lock_thread = (void *) current_thread();
965 l->debug.state |= USLOCK_TAKEN;
966 l->debug.lock_pc = pc;
967 l->debug.lock_cpu = (unsigned char)mycpu;
968 }
969 #endif /* USLOCK_DEBUG */
970 #if LCK_MTX_USE_ARCH
971
972 /*
973 * Slow path routines for lck_mtx locking and unlocking functions.
974 *
975 * These functions were previously implemented in x86 assembly,
976 * and some optimizations are in place in this c code to obtain a compiled code
977 * as performant and compact as the assembly version.
978 *
979 * To avoid to inline these functions on the fast path, all functions directly called by
980 * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
981 * in such a way the fast path can tail call into them. In this way the return address
982 * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
983 *
984 * Slow path code is structured in such a way there are no calls to functions that will return
985 * on the context of the caller function, i.e. all functions called are or tail call functions
986 * or inline functions. The number of arguments of the tail call functions are less then six,
987 * so that they can be passed over registers and do not need to be pushed on stack.
988 * This allows the compiler to not create a stack frame for the functions.
989 *
990 * __improbable and __probable are used to compile the slow path code in such a way
991 * the fast path case will be on a sequence of instructions with as less jumps as possible,
992 * to make this case the most optimized even if falling through the slow path.
993 */
994
995 /*
996 * Intel lock invariants:
997 *
998 * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
999 *
1000 * The lock owner is promoted to the max priority of all its waiters only if it
1001 * was a lower priority when it acquired or was an owner when a waiter waited.
1002 * Max priority is capped at MAXPRI_PROMOTE.
1003 *
1004 * The last waiter will not be promoted as it is woken up, but the last
1005 * lock owner may not have been the last thread to have been woken up depending on the
1006 * luck of the draw. Therefore a last-owner may still have the promoted-on-wakeup
1007 * flag set.
1008 *
1009 * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1010 * priority from dropping priority in the future without having to take thread lock
1011 * on acquire.
1012 */
1013
1014 /*
1015 * Routine: lck_mtx_alloc_init
1016 */
1017 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1018 lck_mtx_alloc_init(
1019 lck_grp_t *grp,
1020 lck_attr_t *attr)
1021 {
1022 lck_mtx_t *lck;
1023
1024 lck = zalloc(KT_LCK_MTX);
1025 lck_mtx_init(lck, grp, attr);
1026 return lck;
1027 }
1028
1029 /*
1030 * Routine: lck_mtx_free
1031 */
1032 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1033 lck_mtx_free(
1034 lck_mtx_t *lck,
1035 lck_grp_t *grp)
1036 {
1037 lck_mtx_destroy(lck, grp);
1038 zfree(KT_LCK_MTX, lck);
1039 }
1040
1041 /*
1042 * Routine: lck_mtx_init
1043 */
1044 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1045 lck_mtx_init(
1046 lck_mtx_t *lck,
1047 lck_grp_t *grp,
1048 lck_attr_t *attr)
1049 {
1050 if (attr == LCK_ATTR_NULL) {
1051 attr = &lck_attr_default;
1052 }
1053
1054 *lck = (lck_mtx_t){
1055 .lck_mtx_grp = grp->lck_grp_attr_id,
1056 };
1057
1058 if (__improbable((attr->lck_attr_val & LCK_ATTR_DEBUG) ||
1059 (grp->lck_grp_attr_id & LCK_GRP_ATTR_DEBUG))) {
1060 lck->lck_mtx_profile = 1;
1061 }
1062
1063 lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
1064 }
1065
1066 /*
1067 * Routine: lck_mtx_destroy
1068 */
1069 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1070 lck_mtx_destroy(
1071 lck_mtx_t *lck,
1072 lck_grp_t *grp)
1073 {
1074 if (lck->lck_mtx_state == LCK_MTX_TAG_DESTROYED) {
1075 return;
1076 }
1077 #if MACH_LDEBUG
1078 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1079 #endif
1080 ordered_store_mtx_state_release(lck, LCK_MTX_TAG_DESTROYED);
1081 lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1082 }
1083
1084
1085 #if DEVELOPMENT | DEBUG
1086 __attribute__((noinline))
1087 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1088 lck_mtx_owner_check_panic(
1089 lck_mtx_t *lock)
1090 {
1091 thread_t owner = ctid_get_thread_unsafe(lock->lck_mtx_owner);
1092 panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1093 }
1094 #endif
1095
1096 /*
1097 * Routine: lck_mtx_unlock_slow
1098 *
1099 * Unlocks a mutex held by current thread.
1100 *
1101 * It will wake up waiters if necessary.
1102 *
1103 * Interlock can be held.
1104 */
1105 __attribute__((noinline))
1106 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1107 lck_mtx_unlock_slow(
1108 lck_mtx_t *lock)
1109 {
1110 thread_t thread;
1111 uint32_t state;
1112
1113 state = ordered_load_mtx_state(lock);
1114
1115 thread = current_thread();
1116
1117 #if DEVELOPMENT | DEBUG
1118 if (__improbable(lock->lck_mtx_owner != thread->ctid)) {
1119 lck_mtx_owner_check_panic(lock);
1120 }
1121 #endif
1122
1123 /* check if it is held as a spinlock */
1124 if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1125 goto unlock;
1126 }
1127
1128 lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1129
1130 unlock:
1131 /* preemption disabled, interlock held and mutex not held */
1132
1133 /* clear owner */
1134 ordered_store_mtx_owner(lock, 0);
1135
1136 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1137 #if MACH_LDEBUG
1138 if (thread) {
1139 thread->mutex_count--;
1140 }
1141 #endif
1142 return lck_mtx_unlock_wakeup_tail(lock, state);
1143 }
1144
1145 /* release interlock, promotion and clear spin flag */
1146 state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1147 ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
1148
1149 #if MACH_LDEBUG
1150 /* perform lock statistics after drop to prevent delay */
1151 if (thread) {
1152 thread->mutex_count--; /* lock statistic */
1153 }
1154 #endif /* MACH_LDEBUG */
1155
1156 /* re-enable preemption */
1157 lck_mtx_unlock_finish_inline(lock, state);
1158 }
1159
1160 #define LCK_MTX_LCK_WAIT_CODE 0x20
1161 #define LCK_MTX_LCK_WAKEUP_CODE 0x21
1162 #define LCK_MTX_LCK_SPIN_CODE 0x22
1163 #define LCK_MTX_LCK_ACQUIRE_CODE 0x23
1164 #define LCK_MTX_LCK_DEMOTE_CODE 0x24
1165
1166 /*
1167 * Routine: lck_mtx_unlock_wakeup_tail
1168 *
1169 * Invoked on unlock when there is
1170 * contention, i.e. the assembly routine sees
1171 * that mutex->lck_mtx_waiters != 0
1172 *
1173 * neither the mutex or interlock is held
1174 *
1175 * Note that this routine might not be called if there are pending
1176 * waiters which have previously been woken up, and they didn't
1177 * end up boosting the old owner.
1178 *
1179 * assembly routine previously did the following to mutex:
1180 * (after saving the state in prior_lock_state)
1181 * decremented lck_mtx_waiters if nonzero
1182 *
1183 * This function needs to be called as a tail call
1184 * to optimize the compiled code.
1185 */
1186 __attribute__((noinline))
1187 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state)1188 lck_mtx_unlock_wakeup_tail(
1189 lck_mtx_t *mutex,
1190 uint32_t state)
1191 {
1192 struct turnstile *ts;
1193
1194 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1195 kern_return_t did_wake;
1196
1197 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1198 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1199
1200 ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1201
1202 did_wake = waitq_wakeup64_one(&ts->ts_waitq, LCK_MTX_EVENT(mutex),
1203 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1204 assert(did_wake == KERN_SUCCESS);
1205
1206 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1207 turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1208
1209 state -= LCK_MTX_WAITER;
1210 state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1211 ordered_store_mtx_state_release(mutex, state);
1212
1213 assert(current_thread()->turnstile != NULL);
1214
1215 turnstile_cleanup();
1216
1217 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1218 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1219
1220 lck_mtx_unlock_finish_inline(mutex, state);
1221 }
1222
1223 /*
1224 * Routine: lck_mtx_lock_acquire_x86
1225 *
1226 * Invoked on acquiring the mutex when there is
1227 * contention (i.e. the assembly routine sees that
1228 * that mutex->lck_mtx_waiters != 0
1229 *
1230 * mutex is owned... interlock is held... preemption is disabled
1231 */
1232 __attribute__((always_inline))
1233 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1234 lck_mtx_lock_acquire_inline(
1235 lck_mtx_t *mutex,
1236 struct turnstile *ts)
1237 {
1238 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1239
1240 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1241 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1242
1243 if (mutex->lck_mtx_waiters > 0) {
1244 if (ts == NULL) {
1245 ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1246 }
1247
1248 turnstile_update_inheritor(ts, current_thread(),
1249 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
1250 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1251 }
1252
1253 if (ts != NULL) {
1254 turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1255 }
1256
1257 assert(current_thread()->turnstile != NULL);
1258
1259 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1260 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1261 }
1262
1263 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1264 lck_mtx_lock_acquire_x86(
1265 lck_mtx_t *mutex)
1266 {
1267 return lck_mtx_lock_acquire_inline(mutex, NULL);
1268 }
1269
1270 /*
1271 * Tail call helpers for lock functions that perform
1272 * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1273 * the caller's compiled code.
1274 */
1275
1276 __attribute__((noinline))
1277 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,struct turnstile * ts)1278 lck_mtx_lock_acquire_tail(
1279 lck_mtx_t *mutex,
1280 struct turnstile *ts)
1281 {
1282 lck_mtx_lock_acquire_inline(mutex, ts);
1283 lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex));
1284 }
1285
1286 __attribute__((noinline))
1287 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1288 lck_mtx_try_lock_acquire_tail(
1289 lck_mtx_t *mutex)
1290 {
1291 lck_mtx_lock_acquire_inline(mutex, NULL);
1292 lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1293
1294 return TRUE;
1295 }
1296
1297 __attribute__((noinline))
1298 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1299 lck_mtx_convert_spin_acquire_tail(
1300 lck_mtx_t *mutex)
1301 {
1302 lck_mtx_lock_acquire_inline(mutex, NULL);
1303 lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1304 }
1305
1306 static void
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1307 lck_mtx_ilk_unlock(
1308 lck_mtx_t *mutex)
1309 {
1310 lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1311 }
1312
1313 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1314 lck_mtx_interlock_lock_set_and_clear_flags(
1315 lck_mtx_t *mutex,
1316 uint32_t xor_flags,
1317 uint32_t and_flags,
1318 uint32_t *new_state)
1319 {
1320 uint32_t state, prev;
1321 state = *new_state;
1322
1323 for (;;) {
1324 /* have to wait for interlock to clear */
1325 while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1326 cpu_pause();
1327 state = ordered_load_mtx_state(mutex);
1328 }
1329 prev = state; /* prev contains snapshot for exchange */
1330 state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
1331 state &= ~and_flags; /* clear flags */
1332
1333 disable_preemption();
1334 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1335 break;
1336 }
1337 enable_preemption();
1338 cpu_pause();
1339 state = ordered_load_mtx_state(mutex);
1340 }
1341 *new_state = state;
1342 return;
1343 }
1344
1345 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1346 lck_mtx_interlock_lock_clear_flags(
1347 lck_mtx_t *mutex,
1348 uint32_t and_flags,
1349 uint32_t *new_state)
1350 {
1351 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1352 }
1353
1354 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1355 lck_mtx_interlock_lock(
1356 lck_mtx_t *mutex,
1357 uint32_t *new_state)
1358 {
1359 return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1360 }
1361
1362 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1363 lck_mtx_interlock_try_lock_set_flags(
1364 lck_mtx_t *mutex,
1365 uint32_t or_flags,
1366 uint32_t *new_state)
1367 {
1368 uint32_t state, prev;
1369 state = *new_state;
1370
1371 /* have to wait for interlock to clear */
1372 if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1373 return 0;
1374 }
1375 prev = state; /* prev contains snapshot for exchange */
1376 state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
1377 disable_preemption();
1378 if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1379 *new_state = state;
1380 return 1;
1381 }
1382
1383 enable_preemption();
1384 return 0;
1385 }
1386
1387 __attribute__((noinline))
1388 static void
lck_mtx_lock_contended(lck_mtx_t * lock,bool profile,boolean_t * first_miss)1389 lck_mtx_lock_contended(
1390 lck_mtx_t *lock,
1391 bool profile,
1392 boolean_t *first_miss)
1393 {
1394 lck_mtx_spinwait_ret_type_t ret;
1395 uint32_t state;
1396 thread_t thread;
1397 struct turnstile *ts = NULL;
1398
1399 try_again:
1400
1401 if (profile) {
1402 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, first_miss);
1403 }
1404
1405 ret = lck_mtx_lock_spinwait_x86(lock);
1406 state = ordered_load_mtx_state(lock);
1407 switch (ret) {
1408 case LCK_MTX_SPINWAIT_NO_SPIN:
1409 /*
1410 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1411 * try to spin.
1412 */
1413 /* just fall through case LCK_MTX_SPINWAIT_SPUN */
1414 OS_FALLTHROUGH;
1415 case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1416 case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1417 case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1418 case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1419 /*
1420 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1421 * interlock not held
1422 */
1423 lck_mtx_interlock_lock(lock, &state);
1424 assert(state & LCK_MTX_ILOCKED_MSK);
1425
1426 if (state & LCK_MTX_MLOCKED_MSK) {
1427 if (profile) {
1428 LCK_MTX_PROF_WAIT(lock, lock->lck_mtx_grp,
1429 ret == LCK_MTX_SPINWAIT_NO_SPIN, first_miss);
1430 }
1431 lck_mtx_lock_wait_x86(lock, &ts);
1432 /*
1433 * interlock is not held here.
1434 */
1435 goto try_again;
1436 } else {
1437 /* grab the mutex */
1438 state |= LCK_MTX_MLOCKED_MSK;
1439 ordered_store_mtx_state_release(lock, state);
1440 thread = current_thread();
1441 ordered_store_mtx_owner(lock, thread->ctid);
1442 #if MACH_LDEBUG
1443 if (thread) {
1444 thread->mutex_count++;
1445 }
1446 #endif /* MACH_LDEBUG */
1447 }
1448
1449 break;
1450 case LCK_MTX_SPINWAIT_ACQUIRED:
1451 /*
1452 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1453 * interlock is held and preemption disabled
1454 * owner is set and mutex marked as locked
1455 * statistics updated too
1456 */
1457 break;
1458 default:
1459 panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1460 }
1461
1462 /*
1463 * interlock is already acquired here
1464 */
1465
1466 /* mutex has been acquired */
1467 if (state & LCK_MTX_WAITERS_MSK) {
1468 /*
1469 * lck_mtx_lock_acquire_tail will call
1470 * turnstile_complete.
1471 */
1472 return lck_mtx_lock_acquire_tail(lock, ts);
1473 }
1474
1475 if (ts != NULL) {
1476 turnstile_complete_hash((uintptr_t)lock, TURNSTILE_KERNEL_MUTEX);
1477 }
1478
1479 assert(current_thread()->turnstile != NULL);
1480
1481 /* release the interlock */
1482 lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock));
1483 }
1484
1485 /*
1486 * Helper noinline functions for calling
1487 * panic to optimize compiled code.
1488 */
1489
1490 __attribute__((noinline)) __abortlike
1491 static void
lck_mtx_destroyed(lck_mtx_t * lock)1492 lck_mtx_destroyed(
1493 lck_mtx_t *lock)
1494 {
1495 panic("trying to interlock destroyed mutex (%p)", lock);
1496 }
1497
1498 __attribute__((noinline))
1499 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1500 lck_mtx_try_destroyed(
1501 lck_mtx_t *lock)
1502 {
1503 panic("trying to interlock destroyed mutex (%p)", lock);
1504 return FALSE;
1505 }
1506
1507 __attribute__((always_inline))
1508 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1509 lck_mtx_lock_wait_interlock_to_clear(
1510 lck_mtx_t *lock,
1511 uint32_t* new_state)
1512 {
1513 uint32_t state;
1514
1515 for (;;) {
1516 cpu_pause();
1517 state = ordered_load_mtx_state(lock);
1518 if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1519 *new_state = state;
1520 return TRUE;
1521 }
1522 if (state & LCK_MTX_MLOCKED_MSK) {
1523 /* if it is held as mutex, just fail */
1524 return FALSE;
1525 }
1526 }
1527 }
1528
1529 __attribute__((always_inline))
1530 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1531 lck_mtx_try_lock_wait_interlock_to_clear(
1532 lck_mtx_t *lock,
1533 uint32_t* new_state)
1534 {
1535 uint32_t state;
1536
1537 for (;;) {
1538 cpu_pause();
1539 state = ordered_load_mtx_state(lock);
1540 if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1541 /* if it is held as mutex or spin, just fail */
1542 return FALSE;
1543 }
1544 if (!(state & LCK_MTX_ILOCKED_MSK)) {
1545 *new_state = state;
1546 return TRUE;
1547 }
1548 }
1549 }
1550
1551 /*
1552 * Routine: lck_mtx_lock_slow
1553 *
1554 * Locks a mutex for current thread.
1555 * If the lock is contended this function might
1556 * sleep.
1557 *
1558 * Called with interlock not held.
1559 */
1560 __attribute__((noinline))
1561 void
lck_mtx_lock_slow(lck_mtx_t * lock)1562 lck_mtx_lock_slow(
1563 lck_mtx_t *lock)
1564 {
1565 bool profile;
1566 uint32_t state;
1567 int first_miss = 0;
1568
1569 state = ordered_load_mtx_state(lock);
1570 profile = (state & LCK_MTX_PROFILE_MSK);
1571
1572 if (__improbable(profile)) {
1573 if (state & LCK_MTX_SPIN_MSK) {
1574 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1575 assert(state & LCK_MTX_ILOCKED_MSK);
1576 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1577 }
1578 }
1579
1580 /* is the interlock or mutex held */
1581 if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1582 /*
1583 * Note: LCK_MTX_TAG_DESTROYED has
1584 * LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1585 */
1586
1587 /* is the mutex already held */
1588 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1589 /* no, must have been the mutex */
1590 return lck_mtx_lock_contended(lock, profile, &first_miss);
1591 }
1592
1593 /* check to see if it is marked destroyed */
1594 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1595 lck_mtx_destroyed(lock);
1596 }
1597 }
1598
1599 /* no - can't be DESTROYED or locked */
1600 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1601 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1602 return lck_mtx_lock_contended(lock, profile, &first_miss);
1603 }
1604 }
1605
1606 /* lock and interlock acquired */
1607
1608 /* record owner of mutex */
1609 ordered_store_mtx_owner(lock, current_thread()->ctid);
1610
1611 #if MACH_LDEBUG
1612 if (thread) {
1613 thread->mutex_count++; /* lock statistic */
1614 }
1615 #endif
1616 /*
1617 * Check if there are waiters to
1618 * inherit their priority.
1619 */
1620 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1621 return lck_mtx_lock_acquire_tail(lock, NULL);
1622 }
1623
1624 /* release the interlock */
1625 lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1626 }
1627
1628 __attribute__((noinline))
1629 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1630 lck_mtx_try_lock_slow(
1631 lck_mtx_t *lock)
1632 {
1633 bool profile;
1634 uint32_t state;
1635 int first_miss = 0;
1636
1637 state = ordered_load_mtx_state(lock);
1638 profile = (state & LCK_MTX_PROFILE_MSK);
1639
1640 /* is the interlock or mutex held */
1641 if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1642 /*
1643 * Note: LCK_MTX_TAG_DESTROYED has
1644 * LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1645 */
1646
1647 /* is the mutex already held */
1648 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1649 return FALSE;
1650 }
1651
1652 /* check to see if it is marked destroyed */
1653 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1654 lck_mtx_try_destroyed(lock);
1655 }
1656
1657 if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1658 if (__improbable(profile)) {
1659 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1660 }
1661 return FALSE;
1662 }
1663 }
1664
1665 if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1666 if (__improbable(profile)) {
1667 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1668 }
1669 return FALSE;
1670 }
1671
1672 /* lock and interlock acquired */
1673
1674 /* record owner of mutex */
1675 ordered_store_mtx_owner(lock, current_thread()->ctid);
1676
1677 #if MACH_LDEBUG
1678 if (thread) {
1679 thread->mutex_count++; /* lock statistic */
1680 }
1681 #endif
1682 /*
1683 * Check if there are waiters to
1684 * inherit their priority.
1685 */
1686 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1687 return lck_mtx_try_lock_acquire_tail(lock);
1688 }
1689
1690 /* release the interlock */
1691 lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1692
1693 return TRUE;
1694 }
1695
1696 __attribute__((noinline))
1697 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1698 lck_mtx_lock_spin_slow(
1699 lck_mtx_t *lock)
1700 {
1701 bool profile;
1702 uint32_t state;
1703 int first_miss = 0;
1704
1705 state = ordered_load_mtx_state(lock);
1706 profile = (state & LCK_MTX_PROFILE_MSK);
1707
1708 if (__improbable(profile)) {
1709 if (state & LCK_MTX_SPIN_MSK) {
1710 /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1711 assert(state & LCK_MTX_ILOCKED_MSK);
1712 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1713 }
1714 }
1715
1716 /* is interlock or mutex held */
1717 if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1718 /*
1719 * Note: LCK_MTX_TAG_DESTROYED has
1720 * LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1721 */
1722
1723 /* is the mutex already held */
1724 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1725 /* no, must have been the mutex */
1726 return lck_mtx_lock_contended(lock, profile, &first_miss);
1727 }
1728
1729 /* check to see if it is marked destroyed */
1730 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1731 lck_mtx_destroyed(lock);
1732 }
1733 }
1734
1735 bool acquired = true;
1736
1737 #if CONFIG_DTRACE
1738 uint64_t spin_start;
1739
1740 if (__probable(lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1741 goto past_spin;
1742 }
1743
1744 spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
1745 #endif /* CONFIG_DTRACE */
1746
1747 /* note - can't be DESTROYED or locked */
1748 /* note - preemption is not disabled while spinning */
1749 while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1750 if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1751 acquired = false;
1752 break;
1753 }
1754 }
1755
1756 LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
1757 #if CONFIG_DTRACE
1758 past_spin:
1759 #endif /* CONFIG_DTRACE */
1760
1761 if (__improbable(!acquired)) {
1762 return lck_mtx_lock_contended(lock, profile, &first_miss);
1763 }
1764
1765 /* lock as spinlock and interlock acquired */
1766
1767 /* record owner of mutex */
1768 ordered_store_mtx_owner(lock, current_thread()->ctid);
1769
1770 #if MACH_LDEBUG
1771 if (thread) {
1772 thread->mutex_count++; /* lock statistic */
1773 }
1774 #endif
1775 LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp, true, profile);
1776
1777 /* return with the interlock held and preemption disabled */
1778 return;
1779 }
1780
1781 __attribute__((noinline))
1782 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1783 lck_mtx_try_lock_spin_slow(
1784 lck_mtx_t *lock)
1785 {
1786 bool profile;
1787 uint32_t state;
1788 int first_miss = 0;
1789
1790 state = ordered_load_mtx_state(lock);
1791 profile = (state & LCK_MTX_PROFILE_MSK);
1792
1793 /* is the interlock or mutex held */
1794 if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1795 /*
1796 * Note: LCK_MTX_TAG_DESTROYED has
1797 * LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1798 */
1799
1800 /* is the mutex already held */
1801 if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1802 return FALSE;
1803 }
1804
1805 /* check to see if it is marked destroyed */
1806 if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1807 lck_mtx_try_destroyed(lock);
1808 }
1809 }
1810
1811 /* note - can't be DESTROYED or locked */
1812 if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1813 if (__improbable(profile)) {
1814 LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1815 }
1816 return FALSE;
1817 }
1818
1819 /* lock and interlock acquired */
1820
1821 /* record owner of mutex */
1822 ordered_store_mtx_owner(lock, current_thread()->ctid);
1823
1824 #if MACH_LDEBUG
1825 if (thread) {
1826 thread->mutex_count++; /* lock statistic */
1827 }
1828 #endif
1829
1830 #if CONFIG_DTRACE
1831 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1832 #endif
1833 return TRUE;
1834 }
1835
1836 __attribute__((noinline))
1837 void
lck_mtx_convert_spin(lck_mtx_t * lock)1838 lck_mtx_convert_spin(
1839 lck_mtx_t *lock)
1840 {
1841 uint32_t state;
1842
1843 state = ordered_load_mtx_state(lock);
1844
1845 assertf(lock->lck_mtx_owner == current_thread()->ctid,
1846 "lock %p not owned by thread ctid %d (current owner %d)",
1847 lock, current_thread()->ctid, lock->lck_mtx_owner);
1848
1849 if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
1850 /* already owned as a mutex, just return */
1851 return;
1852 }
1853
1854 assert(get_preemption_level() > 0);
1855 assert(state & LCK_MTX_ILOCKED_MSK);
1856 assert(state & LCK_MTX_SPIN_MSK);
1857
1858 /*
1859 * Check if there are waiters to
1860 * inherit their priority.
1861 */
1862 if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1863 return lck_mtx_convert_spin_acquire_tail(lock);
1864 }
1865
1866 lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
1867
1868 return;
1869 }
1870
1871 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)1872 lck_mtx_lock_grab_mutex(
1873 lck_mtx_t *lock)
1874 {
1875 uint32_t state;
1876
1877 state = ordered_load_mtx_state(lock);
1878
1879 if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
1880 return FALSE;
1881 }
1882
1883 /* lock and interlock acquired */
1884
1885 /* record owner of mutex */
1886 ordered_store_mtx_owner(lock, current_thread()->ctid);
1887
1888 #if MACH_LDEBUG
1889 if (thread) {
1890 thread->mutex_count++; /* lock statistic */
1891 }
1892 #endif
1893 return TRUE;
1894 }
1895
1896 __attribute__((noinline))
1897 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1898 lck_mtx_assert(
1899 lck_mtx_t *lock,
1900 unsigned int type)
1901 {
1902 thread_t thread;
1903 uint32_t state;
1904
1905 thread = current_thread();
1906 state = ordered_load_mtx_state(lock);
1907
1908 if (type == LCK_MTX_ASSERT_OWNED) {
1909 if ((lock->lck_mtx_owner != thread->ctid) ||
1910 !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1911 panic("mutex (%p) not owned", lock);
1912 }
1913 } else {
1914 assert(type == LCK_MTX_ASSERT_NOTOWNED);
1915 if (lock->lck_mtx_owner == thread->ctid) {
1916 panic("mutex (%p) owned", lock);
1917 }
1918 }
1919 }
1920
1921 /*
1922 * Routine: lck_mtx_lock_spinwait_x86
1923 *
1924 * Invoked trying to acquire a mutex when there is contention but
1925 * the holder is running on another processor. We spin for up to a maximum
1926 * time waiting for the lock to be released.
1927 *
1928 * Called with the interlock unlocked.
1929 * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
1930 * returns LCK_MTX_SPINWAIT_SPUN if we spun
1931 * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
1932 */
1933 __attribute__((noinline))
1934 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)1935 lck_mtx_lock_spinwait_x86(
1936 lck_mtx_t *mutex)
1937 {
1938 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1939 ctid_t owner, prev_owner;
1940 uint64_t window_deadline, sliding_deadline, high_deadline;
1941 uint64_t spin_start, start_time, cur_time, avg_hold_time, bias, delta;
1942 lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
1943 int loopcount = 0;
1944 int total_hold_time_samples, window_hold_time_samples, unfairness;
1945 uint prev_owner_cpu;
1946 bool adjust;
1947
1948 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1949 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
1950 mutex->lck_mtx_waiters, 0, 0);
1951
1952 spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
1953 start_time = mach_absolute_time();
1954
1955 /*
1956 * window_deadline represents the "learning" phase.
1957 * The thread collects statistics about the lock during
1958 * window_deadline and then it makes a decision on whether to spin more
1959 * or block according to the concurrency behavior
1960 * observed.
1961 *
1962 * Every thread can spin at least low_MutexSpin.
1963 */
1964 window_deadline = start_time + low_MutexSpin;
1965 /*
1966 * Sliding_deadline is the adjusted spin deadline
1967 * computed after the "learning" phase.
1968 */
1969 sliding_deadline = window_deadline;
1970 /*
1971 * High_deadline is a hard deadline. No thread
1972 * can spin more than this deadline.
1973 */
1974 if (high_MutexSpin >= 0) {
1975 high_deadline = start_time + high_MutexSpin;
1976 } else {
1977 high_deadline = start_time + low_MutexSpin * real_ncpus;
1978 }
1979
1980 /*
1981 * Do not know yet which is the owner cpu.
1982 * Initialize prev_owner_cpu with next cpu.
1983 */
1984 prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1985 total_hold_time_samples = 0;
1986 window_hold_time_samples = 0;
1987 avg_hold_time = 0;
1988 adjust = TRUE;
1989 bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
1990
1991 prev_owner = mutex->lck_mtx_owner;
1992
1993 /*
1994 * Spin while:
1995 * - mutex is locked, and
1996 * - it's locked as a spin lock, and
1997 * - owner is running on another processor, and
1998 * - we haven't spun for long enough.
1999 */
2000 do {
2001 /*
2002 * Try to acquire the lock.
2003 */
2004 if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
2005 retval = LCK_MTX_SPINWAIT_ACQUIRED;
2006 break;
2007 }
2008
2009 cur_time = mach_absolute_time();
2010
2011 /*
2012 * Never spin past high_deadline.
2013 */
2014 if (cur_time >= high_deadline) {
2015 retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2016 break;
2017 }
2018
2019 /*
2020 * Check if owner is on core. If not block.
2021 */
2022 owner = mutex->lck_mtx_owner;
2023 if (owner) {
2024 uint32_t i = prev_owner_cpu;
2025 bool owner_on_core = false;
2026
2027 disable_preemption();
2028 owner = mutex->lck_mtx_owner;
2029
2030 /*
2031 * For scalability we want to check if the owner is on core
2032 * without locking the mutex interlock.
2033 * If we do not lock the mutex interlock, the owner that we see might be
2034 * invalid, so we cannot dereference it. Therefore we cannot check
2035 * any field of the thread to tell us if it is on core.
2036 * Check if the thread that is running on the other cpus matches the owner.
2037 */
2038 if (owner) {
2039 thread_t owner_thread = ctid_get_thread_unsafe(owner);
2040
2041 do {
2042 if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner_thread)) {
2043 owner_on_core = true;
2044 break;
2045 }
2046 if (++i >= real_ncpus) {
2047 i = 0;
2048 }
2049 } while (i != prev_owner_cpu);
2050 }
2051
2052 enable_preemption();
2053
2054 if (owner == 0) {
2055 /* nothing to do, we didn't find an owner */
2056 } else if (owner_on_core) {
2057 prev_owner_cpu = i;
2058 } else {
2059 prev_owner = owner;
2060 owner = mutex->lck_mtx_owner;
2061 if (owner == prev_owner) {
2062 /*
2063 * Owner is not on core.
2064 * Stop spinning.
2065 */
2066 if (loopcount == 0) {
2067 retval = LCK_MTX_SPINWAIT_NO_SPIN;
2068 } else {
2069 retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2070 }
2071 break;
2072 }
2073 /*
2074 * Fall through if the owner changed while we were scanning.
2075 * The new owner could potentially be on core, so loop
2076 * again.
2077 */
2078 }
2079 }
2080
2081 /*
2082 * Save how many times we see the owner changing.
2083 * We can roughly estimate the mutex hold
2084 * time and the fairness with that.
2085 */
2086 if (owner != prev_owner) {
2087 prev_owner = owner;
2088 total_hold_time_samples++;
2089 window_hold_time_samples++;
2090 }
2091
2092 /*
2093 * Learning window expired.
2094 * Try to adjust the sliding_deadline.
2095 */
2096 if (cur_time >= window_deadline) {
2097 /*
2098 * If there was not contention during the window
2099 * stop spinning.
2100 */
2101 if (window_hold_time_samples < 1) {
2102 retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2103 break;
2104 }
2105
2106 if (adjust) {
2107 /*
2108 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2109 * but the lock is unfair, so let's try to estimate by how much.
2110 */
2111 unfairness = total_hold_time_samples / real_ncpus;
2112
2113 if (unfairness == 0) {
2114 /*
2115 * We observed the owner changing `total_hold_time_samples` times which
2116 * let us estimate the average hold time of this mutex for the duration
2117 * of the spin time.
2118 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2119 *
2120 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2121 */
2122 delta = cur_time - start_time;
2123 sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2124 } else {
2125 /*
2126 * In this case at least one of the other cpus was able to get the lock twice
2127 * while I was spinning.
2128 * We could spin longer but it won't necessarily help if the system is unfair.
2129 * Try to randomize the wait to reduce contention.
2130 *
2131 * We compute how much time we could potentially spin
2132 * and distribute it over the cpus.
2133 *
2134 * bias is an integer between 0 and real_ncpus.
2135 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2136 */
2137 delta = high_deadline - cur_time;
2138 sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2139 adjust = FALSE;
2140 }
2141 }
2142
2143 window_deadline += low_MutexSpin;
2144 window_hold_time_samples = 0;
2145 }
2146
2147 /*
2148 * Stop spinning if we past
2149 * the adjusted deadline.
2150 */
2151 if (cur_time >= sliding_deadline) {
2152 retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2153 break;
2154 }
2155
2156 if (mutex->lck_mtx_owner != 0) {
2157 cpu_pause();
2158 }
2159
2160 loopcount++;
2161 } while (TRUE);
2162
2163 LCK_MTX_ADAPTIVE_SPIN_END(mutex, mutex->lck_mtx_grp, spin_start);
2164
2165 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2166 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2167 mutex->lck_mtx_waiters, retval, 0);
2168
2169 return retval;
2170 }
2171
2172
2173
2174 /*
2175 * Routine: lck_mtx_lock_wait_x86
2176 *
2177 * Invoked in order to wait on contention.
2178 *
2179 * Called with the interlock locked and
2180 * preemption disabled...
2181 * returns it unlocked and with preemption enabled
2182 *
2183 * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2184 * A runnable waiter can exist between wait and acquire
2185 * without a waiters count being set.
2186 * This allows us to never make a spurious wakeup call.
2187 *
2188 * Priority:
2189 * This avoids taking the thread lock if the owning thread is the same priority.
2190 * This optimizes the case of same-priority threads contending on a lock.
2191 * However, that allows the owning thread to drop in priority while holding the lock,
2192 * because there is no state that the priority change can notice that
2193 * says that the targeted thread holds a contended mutex.
2194 *
2195 * One possible solution: priority changes could look for some atomic tag
2196 * on the thread saying 'holding contended lock', and then set up a promotion.
2197 * Needs a story for dropping that promotion - the last contended unlock
2198 * has to notice that this has happened.
2199 */
2200 __attribute__((noinline))
2201 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2202 lck_mtx_lock_wait_x86(
2203 lck_mtx_t *mutex,
2204 struct turnstile **ts)
2205 {
2206 thread_t self = current_thread();
2207 uint64_t sleep_start;
2208
2209 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2210
2211 thread_t holder = ctid_get_thread(mutex->lck_mtx_owner);
2212
2213 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2214 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder),
2215 mutex->lck_mtx_waiters, 0, 0);
2216 sleep_start = LCK_MTX_BLOCK_BEGIN();
2217
2218 mutex->lck_mtx_waiters++;
2219
2220 /*
2221 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2222 * the same turnstile while looping, the matching turnstile compleate will be called
2223 * by lck_mtx_lock_contended when finally acquiring the lock.
2224 */
2225 if (*ts == NULL) {
2226 *ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
2227 }
2228
2229 struct turnstile *turnstile = *ts;
2230 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2231 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2232
2233 waitq_assert_wait64(&turnstile->ts_waitq, LCK_MTX_EVENT(mutex), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2234
2235 lck_mtx_ilk_unlock(mutex);
2236
2237 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2238
2239 thread_block(THREAD_CONTINUE_NULL);
2240
2241 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2242 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2243 mutex->lck_mtx_waiters, 0, 0);
2244
2245 LCK_MTX_BLOCK_END(mutex, mutex->lck_mtx_grp, sleep_start);
2246 }
2247
2248 /*
2249 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2250 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2251 * Returns: TRUE if lock is acquired.
2252 */
2253 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2254 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2255 {
2256 if (not_in_kdp) {
2257 panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2258 }
2259
2260 if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2261 return TRUE;
2262 }
2263
2264 return FALSE;
2265 }
2266
2267 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2268 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2269 {
2270 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2271 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2272 waitinfo->owner = thread_tid(ctid_get_thread(mutex->lck_mtx_owner));
2273 }
2274
2275 #endif /* LCK_MTX_USE_ARCH */
2276 #if CONFIG_PV_TICKET
2277 __startup_func
2278 void
lck_init_pv(void)2279 lck_init_pv(void)
2280 {
2281 uint32_t pvtck = 1;
2282 PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
2283 if (pvtck == 0) {
2284 return;
2285 }
2286 has_lock_pv = cpuid_vmm_present() &&
2287 (cpuid_vmm_get_kvm_features() & CPUID_KVM_FEATURE_PV_UNHALT) != 0;
2288 }
2289 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
2290 #endif
2291